All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86/srso: Reduce overhead of the mitigation
@ 2023-08-21 11:27 Andrew Cooper
  2023-08-21 11:27 ` [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*() Andrew Cooper
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 11:27 UTC (permalink / raw)
  To: LKML
  Cc: Andrew Cooper, x86, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Babu Moger, David.Kaplan, Nikolay Borisov, gregkh,
	Thomas Gleixner

The main point of this series is patch 4 to remove one taken branch from every
function return.  Everything else is cleanup.

Patch 3 has an issue that sadly may invalidate this as a technique.  Patch 4
needs some objtool whispering to fix.

Based on x86/urgent but I suspect this may want rebasing around other fixes in
flight.

Andrew Cooper (4):
  x86/srso: Rename srso_alias_*() to srso_fam19_*()
  x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*()
  x86/ret-thunk: Support CALL-ing to the ret-thunk
  x86/srso: Use CALL-based return thunks to reduce overhead

 arch/x86/include/asm/nospec-branch.h |  9 ++--
 arch/x86/kernel/alternative.c        |  4 +-
 arch/x86/kernel/cpu/bugs.c           |  9 ++--
 arch/x86/kernel/ftrace.c             |  8 +--
 arch/x86/kernel/static_call.c        | 10 ++--
 arch/x86/kernel/vmlinux.lds.S        | 10 ++--
 arch/x86/lib/retpoline.S             | 75 ++++++++++++----------------
 arch/x86/net/bpf_jit_comp.c          |  5 +-
 8 files changed, 67 insertions(+), 63 deletions(-)


base-commit: 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66
-- 
2.30.2


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*()
  2023-08-21 11:27 [PATCH 0/4] x86/srso: Reduce overhead of the mitigation Andrew Cooper
@ 2023-08-21 11:27 ` Andrew Cooper
  2023-09-13 13:46   ` Borislav Petkov
  2023-08-21 11:27 ` [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*() Andrew Cooper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 11:27 UTC (permalink / raw)
  To: LKML
  Cc: Andrew Cooper, x86, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Babu Moger, David.Kaplan, Nikolay Borisov, gregkh,
	Thomas Gleixner

The 'alias' name name is an internal detail of how the logic works.  Rename it
to state which microarchitecture is is applicable to.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
CC: Borislav Petkov <bp@alien8.de>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Josh Poimboeuf <jpoimboe@kernel.org>
CC: Babu Moger <babu.moger@amd.com>
CC: David.Kaplan@amd.com
CC: Nikolay Borisov <nik.borisov@suse.com>
CC: gregkh@linuxfoundation.org
CC: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/nospec-branch.h |  4 ++--
 arch/x86/kernel/cpu/bugs.c           |  2 +-
 arch/x86/kernel/vmlinux.lds.S        |  8 +++----
 arch/x86/lib/retpoline.S             | 34 ++++++++++++++--------------
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c55cc243592e..93e8de0bf94e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -350,11 +350,11 @@ static inline void __x86_return_thunk(void) {}
 
 extern void retbleed_return_thunk(void);
 extern void srso_return_thunk(void);
-extern void srso_alias_return_thunk(void);
+extern void srso_fam19_return_thunk(void);
 
 extern void retbleed_untrain_ret(void);
 extern void srso_untrain_ret(void);
-extern void srso_alias_untrain_ret(void);
+extern void srso_fam19_untrain_ret(void);
 
 extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index f081d26616ac..92bec0d719ce 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2464,7 +2464,7 @@ static void __init srso_select_mitigation(void)
 
 			if (boot_cpu_data.x86 == 0x19) {
 				setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
-				x86_return_thunk = srso_alias_return_thunk;
+				x86_return_thunk = srso_fam19_return_thunk;
 			} else {
 				setup_force_cpu_cap(X86_FEATURE_SRSO);
 				x86_return_thunk = srso_return_thunk;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 83d41c2601d7..c9b6f8b83187 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -147,10 +147,10 @@ SECTIONS
 
 #ifdef CONFIG_CPU_SRSO
 		/*
-		 * See the comment above srso_alias_untrain_ret()'s
+		 * See the comment above srso_fam19_untrain_ret()'s
 		 * definition.
 		 */
-		. = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+		. = srso_fam19_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
 		*(.text..__x86.rethunk_safe)
 #endif
 		ALIGN_ENTRY_TEXT_END
@@ -536,8 +536,8 @@ INIT_PER_CPU(irq_stack_backing_store);
  * Instead do: (A | B) - (A & B) in order to compute the XOR
  * of the two function addresses:
  */
-. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
-		(ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+. = ASSERT(((ABSOLUTE(srso_fam19_untrain_ret) | srso_fam19_safe_ret) -
+		(ABSOLUTE(srso_fam19_untrain_ret) & srso_fam19_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
 		"SRSO function pair won't alias");
 #endif
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cd86aeb5fdd3..772757ea26a7 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -133,58 +133,58 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
 #ifdef CONFIG_RETHUNK
 
 /*
- * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * srso_fam19_untrain_ret() and srso_fam19_safe_ret() are placed at
  * special addresses:
  *
- * - srso_alias_untrain_ret() is 2M aligned
- * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * - srso_fam19_untrain_ret() is 2M aligned
+ * - srso_fam19_safe_ret() is also in the same 2M page but bits 2, 8, 14
  * and 20 in its virtual address are set (while those bits in the
- * srso_alias_untrain_ret() function are cleared).
+ * srso_fam19_untrain_ret() function are cleared).
  *
  * This guarantees that those two addresses will alias in the branch
  * target buffer of Zen3/4 generations, leading to any potential
  * poisoned entries at that BTB slot to get evicted.
  *
- * As a result, srso_alias_safe_ret() becomes a safe return.
+ * As a result, srso_fam19_safe_ret() becomes a safe return.
  */
 #ifdef CONFIG_CPU_SRSO
 	.section .text..__x86.rethunk_untrain
 
-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	UNWIND_HINT_FUNC
 	ANNOTATE_NOENDBR
 	ASM_NOP2
 	lfence
-	jmp srso_alias_return_thunk
-SYM_FUNC_END(srso_alias_untrain_ret)
-__EXPORT_THUNK(srso_alias_untrain_ret)
+	jmp srso_fam19_return_thunk
+SYM_FUNC_END(srso_fam19_untrain_ret)
+__EXPORT_THUNK(srso_fam19_untrain_ret)
 
 	.section .text..__x86.rethunk_safe
 #else
 /* dummy definition for alternatives */
-SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_UNRET_SAFE
 	ret
 	int3
-SYM_FUNC_END(srso_alias_untrain_ret)
+SYM_FUNC_END(srso_fam19_untrain_ret)
 #endif
 
-SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_START(srso_fam19_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	lea 8(%_ASM_SP), %_ASM_SP
 	UNWIND_HINT_FUNC
 	ANNOTATE_UNRET_SAFE
 	ret
 	int3
-SYM_FUNC_END(srso_alias_safe_ret)
+SYM_FUNC_END(srso_fam19_safe_ret)
 
 	.section .text..__x86.return_thunk
 
-SYM_CODE_START(srso_alias_return_thunk)
+SYM_CODE_START(srso_fam19_return_thunk)
 	UNWIND_HINT_FUNC
 	ANNOTATE_NOENDBR
-	call srso_alias_safe_ret
+	call srso_fam19_safe_ret
 	ud2
-SYM_CODE_END(srso_alias_return_thunk)
+SYM_CODE_END(srso_fam19_return_thunk)
 
 /*
  * Some generic notes on the untraining sequences:
@@ -311,7 +311,7 @@ SYM_CODE_END(srso_return_thunk)
 SYM_FUNC_START(entry_untrain_ret)
 	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
 		      "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
-		      "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+		      "jmp srso_fam19_untrain_ret", X86_FEATURE_SRSO_ALIAS
 SYM_FUNC_END(entry_untrain_ret)
 __EXPORT_THUNK(entry_untrain_ret)
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*()
  2023-08-21 11:27 [PATCH 0/4] x86/srso: Reduce overhead of the mitigation Andrew Cooper
  2023-08-21 11:27 ` [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*() Andrew Cooper
@ 2023-08-21 11:27 ` Andrew Cooper
  2023-09-13 13:15   ` Peter Zijlstra
  2023-08-21 11:27 ` [PATCH RFC 3/4] x86/ret-thunk: Support CALL-ing to the ret-thunk Andrew Cooper
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
  3 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 11:27 UTC (permalink / raw)
  To: LKML
  Cc: Andrew Cooper, x86, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Babu Moger, David.Kaplan, Nikolay Borisov, gregkh,
	Thomas Gleixner

The naming is inconsistent.  Rename it to fam17 to state the microarchitecture
it is applicable to, and to mirror the srso_fam19_*() change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
CC: Borislav Petkov <bp@alien8.de>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Josh Poimboeuf <jpoimboe@kernel.org>
CC: Babu Moger <babu.moger@amd.com>
CC: David.Kaplan@amd.com
CC: Nikolay Borisov <nik.borisov@suse.com>
CC: gregkh@linuxfoundation.org
CC: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/nospec-branch.h |  4 ++--
 arch/x86/kernel/cpu/bugs.c           |  2 +-
 arch/x86/kernel/vmlinux.lds.S        |  2 +-
 arch/x86/lib/retpoline.S             | 32 ++++++++++++++--------------
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 93e8de0bf94e..a4c686bc4b1f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -349,11 +349,11 @@ static inline void __x86_return_thunk(void) {}
 #endif
 
 extern void retbleed_return_thunk(void);
-extern void srso_return_thunk(void);
+extern void srso_fam17_return_thunk(void);
 extern void srso_fam19_return_thunk(void);
 
 extern void retbleed_untrain_ret(void);
-extern void srso_untrain_ret(void);
+extern void srso_fam17_untrain_ret(void);
 extern void srso_fam19_untrain_ret(void);
 
 extern void entry_untrain_ret(void);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 92bec0d719ce..893d14a9f282 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2467,7 +2467,7 @@ static void __init srso_select_mitigation(void)
 				x86_return_thunk = srso_fam19_return_thunk;
 			} else {
 				setup_force_cpu_cap(X86_FEATURE_SRSO);
-				x86_return_thunk = srso_return_thunk;
+				x86_return_thunk = srso_fam17_return_thunk;
 			}
 			srso_mitigation = SRSO_MITIGATION_SAFE_RET;
 		} else {
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index c9b6f8b83187..127ccdbf6d95 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -522,7 +522,7 @@ INIT_PER_CPU(irq_stack_backing_store);
 
 #ifdef CONFIG_RETHUNK
 . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
-. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+. = ASSERT((srso_fam17_safe_ret & 0x3f) == 0, "srso_fam17_safe_ret not cacheline-aligned");
 #endif
 
 #ifdef CONFIG_CPU_SRSO
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 772757ea26a7..d8732ae21122 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -194,13 +194,13 @@ SYM_CODE_END(srso_fam19_return_thunk)
  *
  * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
  * Retbleed sequence because the return sequence done there
- * (srso_safe_ret()) is longer and the return sequence must fully nest
+ * (srso_fam17_safe_ret()) is longer and the return sequence must fully nest
  * (end before) the untraining sequence. Therefore, the untraining
  * sequence must fully overlap the return sequence.
  *
  * Regarding alignment - the instructions which need to be untrained,
  * must all start at a cacheline boundary for Zen1/2 generations. That
- * is, instruction sequences starting at srso_safe_ret() and
+ * is, instruction sequences starting at srso_fam17_safe_ret() and
  * the respective instruction sequences at retbleed_return_thunk()
  * must start at a cacheline boundary.
  */
@@ -268,49 +268,49 @@ __EXPORT_THUNK(retbleed_untrain_ret)
 
 /*
  * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
- * above. On kernel entry, srso_untrain_ret() is executed which is a
+ * above. On kernel entry, srso_fam17_untrain_ret() is executed which is a
  *
  * movabs $0xccccc30824648d48,%rax
  *
- * and when the return thunk executes the inner label srso_safe_ret()
+ * and when the return thunk executes the inner label srso_fam17_safe_ret()
  * later, it is a stack manipulation and a RET which is mispredicted and
  * thus a "safe" one to use.
  */
 	.align 64
-	.skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
-SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+	.skip 64 - (srso_fam17_safe_ret - srso_fam17_untrain_ret), 0xcc
+SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_NOENDBR
 	.byte 0x48, 0xb8
 
 /*
  * This forces the function return instruction to speculate into a trap
- * (UD2 in srso_return_thunk() below).  This RET will then mispredict
+ * (UD2 in srso_fam17_return_thunk() below).  This RET will then mispredict
  * and execution will continue at the return site read from the top of
  * the stack.
  */
-SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+SYM_INNER_LABEL(srso_fam17_safe_ret, SYM_L_GLOBAL)
 	lea 8(%_ASM_SP), %_ASM_SP
 	ret
 	int3
 	int3
 	/* end of movabs */
 	lfence
-	call srso_safe_ret
+	call srso_fam17_safe_ret
 	ud2
-SYM_CODE_END(srso_safe_ret)
-SYM_FUNC_END(srso_untrain_ret)
-__EXPORT_THUNK(srso_untrain_ret)
+SYM_CODE_END(srso_fam17_safe_ret)
+SYM_FUNC_END(srso_fam17_untrain_ret)
+__EXPORT_THUNK(srso_fam17_untrain_ret)
 
-SYM_CODE_START(srso_return_thunk)
+SYM_CODE_START(srso_fam17_return_thunk)
 	UNWIND_HINT_FUNC
 	ANNOTATE_NOENDBR
-	call srso_safe_ret
+	call srso_fam17_safe_ret
 	ud2
-SYM_CODE_END(srso_return_thunk)
+SYM_CODE_END(srso_fam17_return_thunk)
 
 SYM_FUNC_START(entry_untrain_ret)
 	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
-		      "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+		      "jmp srso_fam17_untrain_ret", X86_FEATURE_SRSO, \
 		      "jmp srso_fam19_untrain_ret", X86_FEATURE_SRSO_ALIAS
 SYM_FUNC_END(entry_untrain_ret)
 __EXPORT_THUNK(entry_untrain_ret)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RFC 3/4] x86/ret-thunk: Support CALL-ing to the ret-thunk
  2023-08-21 11:27 [PATCH 0/4] x86/srso: Reduce overhead of the mitigation Andrew Cooper
  2023-08-21 11:27 ` [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*() Andrew Cooper
  2023-08-21 11:27 ` [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*() Andrew Cooper
@ 2023-08-21 11:27 ` Andrew Cooper
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
  3 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 11:27 UTC (permalink / raw)
  To: LKML
  Cc: Andrew Cooper, x86, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Babu Moger, David.Kaplan, Nikolay Borisov, gregkh,
	Thomas Gleixner

This will be used to improve the SRSO mitigation.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
CC: Borislav Petkov <bp@alien8.de>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Josh Poimboeuf <jpoimboe@kernel.org>
CC: Babu Moger <babu.moger@amd.com>
CC: David.Kaplan@amd.com
CC: Nikolay Borisov <nik.borisov@suse.com>
CC: gregkh@linuxfoundation.org
CC: Thomas Gleixner <tglx@linutronix.de>

RFC: __static_call_transform() with Jcc interpreted as RET isn't safe with a
transformation to CALL.  Where does this pattern come from?
---
 arch/x86/include/asm/nospec-branch.h |  1 +
 arch/x86/kernel/alternative.c        |  4 +++-
 arch/x86/kernel/cpu/bugs.c           |  1 +
 arch/x86/kernel/ftrace.c             |  8 +++++---
 arch/x86/kernel/static_call.c        | 10 ++++++----
 arch/x86/net/bpf_jit_comp.c          |  5 ++++-
 6 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index a4c686bc4b1f..5d5677bcf749 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -360,6 +360,7 @@ extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
 
 extern void (*x86_return_thunk)(void);
+extern bool x86_return_thunk_use_call;
 
 #ifdef CONFIG_CALL_DEPTH_TRACKING
 extern void __x86_return_skl(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 099d58d02a26..215793fa53f5 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -704,8 +704,10 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
 
 	/* Patch the custom return thunks... */
 	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+		u8 op = x86_return_thunk_use_call ? CALL_INSN_OPCODE : JMP32_INSN_OPCODE;
+
 		i = JMP32_INSN_SIZE;
-		__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
+		__text_gen_insn(bytes, op, addr, x86_return_thunk, i);
 	} else {
 		/* ... or patch them out if not needed. */
 		bytes[i++] = RET_INSN_OPCODE;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 893d14a9f282..de2f84aa526f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -64,6 +64,7 @@ EXPORT_SYMBOL_GPL(x86_pred_cmd);
 static DEFINE_MUTEX(spec_ctrl_mutex);
 
 void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+bool x86_return_thunk_use_call __ro_after_init;
 
 /* Update SPEC_CTRL MSR and its cached copy unconditionally */
 static void update_spec_ctrl(u64 val)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 12df54ff0e81..f383e4a90ce2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -363,9 +363,11 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 		goto fail;
 
 	ip = trampoline + size;
-	if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-		__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
-	else
+	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+		u8 op = x86_return_thunk_use_call ? CALL_INSN_OPCODE : JMP32_INSN_OPCODE;
+
+		__text_gen_insn(ip, op, ip, x86_return_thunk, JMP32_INSN_SIZE);
+	} else
 		memcpy(ip, retq, sizeof(retq));
 
 	/* No need to test direct calls on created trampolines */
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 77a9316da435..b8ff0fdfa49e 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -81,9 +81,11 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
 		break;
 
 	case RET:
-		if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-			code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
-		else
+		if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+			u8 op = x86_return_thunk_use_call ? CALL_INSN_OPCODE : JMP32_INSN_OPCODE;
+
+			code = text_gen_insn(op, insn, x86_return_thunk);
+		} else
 			code = &retinsn;
 		break;
 
@@ -91,7 +93,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
 		if (!func) {
 			func = __static_call_return;
 			if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-				func = x86_return_thunk;
+				func = x86_return_thunk; /* XXX */
 		}
 
 		buf[0] = 0x0f;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 438adb695daa..8e61a97b6d67 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -443,7 +443,10 @@ static void emit_return(u8 **pprog, u8 *ip)
 	u8 *prog = *pprog;
 
 	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
-		emit_jump(&prog, x86_return_thunk, ip);
+		if (x86_return_thunk_use_call)
+			emit_call(&prog, x86_return_thunk, ip);
+		else
+			emit_jump(&prog, x86_return_thunk, ip);
 	} else {
 		EMIT1(0xC3);		/* ret */
 		if (IS_ENABLED(CONFIG_SLS))
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 11:27 [PATCH 0/4] x86/srso: Reduce overhead of the mitigation Andrew Cooper
                   ` (2 preceding siblings ...)
  2023-08-21 11:27 ` [PATCH RFC 3/4] x86/ret-thunk: Support CALL-ing to the ret-thunk Andrew Cooper
@ 2023-08-21 11:27 ` Andrew Cooper
  2023-08-21 15:16   ` Josh Poimboeuf
                     ` (3 more replies)
  3 siblings, 4 replies; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 11:27 UTC (permalink / raw)
  To: LKML
  Cc: Andrew Cooper, x86, Borislav Petkov, Peter Zijlstra,
	Josh Poimboeuf, Babu Moger, David.Kaplan, Nikolay Borisov, gregkh,
	Thomas Gleixner

The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
has been made safe in the BTB.  Specifically, there needs to be no pertubance
to the RAS between a correctly predicted CALL and the subsequent RET.

Use the new infrastructure to CALL to a return thunk.  Remove
srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().

This removes one taken branch from every function return, which will reduce
the overhead of the mitigation.  It also removes one of three moving pieces
from the SRSO mess.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: x86@kernel.org
CC: linux-kernel@vger.kernel.org
CC: Borislav Petkov <bp@alien8.de>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Josh Poimboeuf <jpoimboe@kernel.org>
CC: Babu Moger <babu.moger@amd.com>
CC: David.Kaplan@amd.com
CC: Nikolay Borisov <nik.borisov@suse.com>
CC: gregkh@linuxfoundation.org
CC: Thomas Gleixner <tglx@linutronix.de>

RFC:

  vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction

Any objtool whisperers know what's going on, and particularly why
srso_fam19_return_thunk() appears to be happy?

Also, depends on the resolution of the RFC in the previous patch.
---
 arch/x86/kernel/cpu/bugs.c    |  4 ++-
 arch/x86/kernel/vmlinux.lds.S |  6 ++---
 arch/x86/lib/retpoline.S      | 47 ++++++++++++++---------------------
 3 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index de2f84aa526f..c4d580b485a7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2458,8 +2458,10 @@ static void __init srso_select_mitigation(void)
 		if (IS_ENABLED(CONFIG_CPU_SRSO)) {
 			/*
 			 * Enable the return thunk for generated code
-			 * like ftrace, static_call, etc.
+			 * like ftrace, static_call, etc.  These
+			 * ret-thunks need to call to their target.
 			 */
+			x86_return_thunk_use_call = true;
 			setup_force_cpu_cap(X86_FEATURE_RETHUNK);
 			setup_force_cpu_cap(X86_FEATURE_UNRET);
 
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 127ccdbf6d95..ed7d4020c2b4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -522,7 +522,7 @@ INIT_PER_CPU(irq_stack_backing_store);
 
 #ifdef CONFIG_RETHUNK
 . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
-. = ASSERT((srso_fam17_safe_ret & 0x3f) == 0, "srso_fam17_safe_ret not cacheline-aligned");
+. = ASSERT((srso_fam17_return_thunk & 0x3f) == 0, "srso_fam17_return_thunk not cacheline-aligned");
 #endif
 
 #ifdef CONFIG_CPU_SRSO
@@ -536,8 +536,8 @@ INIT_PER_CPU(irq_stack_backing_store);
  * Instead do: (A | B) - (A & B) in order to compute the XOR
  * of the two function addresses:
  */
-. = ASSERT(((ABSOLUTE(srso_fam19_untrain_ret) | srso_fam19_safe_ret) -
-		(ABSOLUTE(srso_fam19_untrain_ret) & srso_fam19_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+. = ASSERT(((ABSOLUTE(srso_fam19_untrain_ret) | srso_fam19_return_thunk) -
+		(ABSOLUTE(srso_fam19_untrain_ret) & srso_fam19_return_thunk)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
 		"SRSO function pair won't alias");
 #endif
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d8732ae21122..2b1c92632158 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -133,11 +133,11 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
 #ifdef CONFIG_RETHUNK
 
 /*
- * srso_fam19_untrain_ret() and srso_fam19_safe_ret() are placed at
+ * srso_fam19_untrain_ret() and srso_fam19_return_thunk() are placed at
  * special addresses:
  *
  * - srso_fam19_untrain_ret() is 2M aligned
- * - srso_fam19_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * - srso_fam19_return_thunk() is also in the same 2M page but bits 2, 8, 14
  * and 20 in its virtual address are set (while those bits in the
  * srso_fam19_untrain_ret() function are cleared).
  *
@@ -145,7 +145,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
  * target buffer of Zen3/4 generations, leading to any potential
  * poisoned entries at that BTB slot to get evicted.
  *
- * As a result, srso_fam19_safe_ret() becomes a safe return.
+ * As a result, srso_fam19_return_thunk() becomes a safe return.
  */
 #ifdef CONFIG_CPU_SRSO
 	.section .text..__x86.rethunk_untrain
@@ -155,7 +155,8 @@ SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_NOENDBR
 	ASM_NOP2
 	lfence
-	jmp srso_fam19_return_thunk
+	call srso_fam19_return_thunk
+	ud2
 SYM_FUNC_END(srso_fam19_untrain_ret)
 __EXPORT_THUNK(srso_fam19_untrain_ret)
 
@@ -169,23 +170,17 @@ SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 SYM_FUNC_END(srso_fam19_untrain_ret)
 #endif
 
-SYM_START(srso_fam19_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
-	lea 8(%_ASM_SP), %_ASM_SP
+SYM_START(srso_fam19_return_thunk, SYM_L_GLOBAL, SYM_A_NONE)
 	UNWIND_HINT_FUNC
+	ANNOTATE_NOENDBR
+	lea 8(%_ASM_SP), %_ASM_SP
 	ANNOTATE_UNRET_SAFE
 	ret
 	int3
-SYM_FUNC_END(srso_fam19_safe_ret)
+SYM_FUNC_END(srso_fam19_return_thunk)
 
 	.section .text..__x86.return_thunk
 
-SYM_CODE_START(srso_fam19_return_thunk)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
-	call srso_fam19_safe_ret
-	ud2
-SYM_CODE_END(srso_fam19_return_thunk)
-
 /*
  * Some generic notes on the untraining sequences:
  *
@@ -194,13 +189,13 @@ SYM_CODE_END(srso_fam19_return_thunk)
  *
  * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
  * Retbleed sequence because the return sequence done there
- * (srso_fam17_safe_ret()) is longer and the return sequence must fully nest
+ * (srso_fam17_return_thunk()) is longer and the return sequence must fully nest
  * (end before) the untraining sequence. Therefore, the untraining
  * sequence must fully overlap the return sequence.
  *
  * Regarding alignment - the instructions which need to be untrained,
  * must all start at a cacheline boundary for Zen1/2 generations. That
- * is, instruction sequences starting at srso_fam17_safe_ret() and
+ * is, instruction sequences starting at srso_fam17_return_thunk() and
  * the respective instruction sequences at retbleed_return_thunk()
  * must start at a cacheline boundary.
  */
@@ -272,12 +267,12 @@ __EXPORT_THUNK(retbleed_untrain_ret)
  *
  * movabs $0xccccc30824648d48,%rax
  *
- * and when the return thunk executes the inner label srso_fam17_safe_ret()
+ * and when the return thunk executes the inner label srso_fam17_return_thunk()
  * later, it is a stack manipulation and a RET which is mispredicted and
  * thus a "safe" one to use.
  */
 	.align 64
-	.skip 64 - (srso_fam17_safe_ret - srso_fam17_untrain_ret), 0xcc
+	.skip 64 - (srso_fam17_return_thunk - srso_fam17_untrain_ret), 0xcc
 SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_NOENDBR
 	.byte 0x48, 0xb8
@@ -288,26 +283,22 @@ SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
  * and execution will continue at the return site read from the top of
  * the stack.
  */
-SYM_INNER_LABEL(srso_fam17_safe_ret, SYM_L_GLOBAL)
+SYM_INNER_LABEL(srso_fam17_return_thunk, SYM_L_GLOBAL)
+	UNWIND_HINT_FUNC
+	ANNOTATE_NOENDBR
 	lea 8(%_ASM_SP), %_ASM_SP
+	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 	int3
 	/* end of movabs */
 	lfence
-	call srso_fam17_safe_ret
+	call srso_fam17_return_thunk
 	ud2
-SYM_CODE_END(srso_fam17_safe_ret)
+SYM_CODE_END(srso_fam17_return_thunk)
 SYM_FUNC_END(srso_fam17_untrain_ret)
 __EXPORT_THUNK(srso_fam17_untrain_ret)
 
-SYM_CODE_START(srso_fam17_return_thunk)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
-	call srso_fam17_safe_ret
-	ud2
-SYM_CODE_END(srso_fam17_return_thunk)
-
 SYM_FUNC_START(entry_untrain_ret)
 	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
 		      "jmp srso_fam17_untrain_ret", X86_FEATURE_SRSO, \
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
@ 2023-08-21 15:16   ` Josh Poimboeuf
  2023-08-21 23:01     ` Andrew Cooper
  2023-08-21 21:19   ` kernel test robot
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 20+ messages in thread
From: Josh Poimboeuf @ 2023-08-21 15:16 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Borislav Petkov, Peter Zijlstra, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
> The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
> has been made safe in the BTB.  Specifically, there needs to be no pertubance
> to the RAS between a correctly predicted CALL and the subsequent RET.
> 
> Use the new infrastructure to CALL to a return thunk.  Remove
> srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
> 
> This removes one taken branch from every function return, which will reduce
> the overhead of the mitigation.  It also removes one of three moving pieces
> from the SRSO mess.

So, the address of whatever instruction comes after the 'CALL
srso_*_return_thunk' is added to the RSB/RAS, and that might be
speculated to when the thunk returns.  Is that a concern?

> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: x86@kernel.org
> CC: linux-kernel@vger.kernel.org
> CC: Borislav Petkov <bp@alien8.de>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Josh Poimboeuf <jpoimboe@kernel.org>
> CC: Babu Moger <babu.moger@amd.com>
> CC: David.Kaplan@amd.com
> CC: Nikolay Borisov <nik.borisov@suse.com>
> CC: gregkh@linuxfoundation.org
> CC: Thomas Gleixner <tglx@linutronix.de>
> 
> RFC:
> 
>   vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction
> 
> Any objtool whisperers know what's going on, and particularly why
> srso_fam19_return_thunk() appears to be happy?
> 
> Also, depends on the resolution of the RFC in the previous patch.

I can take a look.

-- 
Josh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
@ 2023-08-21 18:06 kernel test robot
  0 siblings, 0 replies; 20+ messages in thread
From: kernel test robot @ 2023-08-21 18:06 UTC (permalink / raw)
  Cc: oe-kbuild-all, llvm

In-Reply-To: <20230821112723.3995187-5-andrew.cooper3@citrix.com>
References: <20230821112723.3995187-5-andrew.cooper3@citrix.com>
TO: Andrew Cooper <andrew.cooper3@citrix.com>

Hi Andrew,

[This is a private test report for your RFC patch.]
kernel test robot noticed the following build warnings:

[auto build test WARNING on 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrew-Cooper/x86-srso-Rename-srso_alias_-to-srso_fam19_/20230821-192855
base:   6405b72e8d17bd1875a56ae52d23ec3cd51b9d66
patch link:    https://lore.kernel.org/r/20230821112723.3995187-5-andrew.cooper3%40citrix.com
patch subject: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
config: x86_64-randconfig-071-20230821 (https://download.01.org/0day-ci/archive/20230822/202308220129.AcqINgTs-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce: (https://download.01.org/0day-ci/archive/20230822/202308220129.AcqINgTs-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308220129.AcqINgTs-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
  2023-08-21 15:16   ` Josh Poimboeuf
@ 2023-08-21 21:19   ` kernel test robot
  2023-08-22  7:23   ` kernel test robot
  2023-09-13 13:17   ` Peter Zijlstra
  3 siblings, 0 replies; 20+ messages in thread
From: kernel test robot @ 2023-08-21 21:19 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: oe-kbuild-all

Hi Andrew,

[This is a private test report for your RFC patch.]
kernel test robot noticed the following build warnings:

[auto build test WARNING on 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrew-Cooper/x86-srso-Rename-srso_alias_-to-srso_fam19_/20230821-192855
base:   6405b72e8d17bd1875a56ae52d23ec3cd51b9d66
patch link:    https://lore.kernel.org/r/20230821112723.3995187-5-andrew.cooper3%40citrix.com
patch subject: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
config: x86_64-buildonly-randconfig-001-20230822 (https://download.01.org/0day-ci/archive/20230822/202308220524.twcmMPN5-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230822/202308220524.twcmMPN5-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308220524.twcmMPN5-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 15:16   ` Josh Poimboeuf
@ 2023-08-21 23:01     ` Andrew Cooper
  2023-08-22  2:22       ` Josh Poimboeuf
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2023-08-21 23:01 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: LKML, x86, Borislav Petkov, Peter Zijlstra, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On 21/08/2023 4:16 pm, Josh Poimboeuf wrote:
> On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
>> The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
>> has been made safe in the BTB.  Specifically, there needs to be no pertubance
>> to the RAS between a correctly predicted CALL and the subsequent RET.
>>
>> Use the new infrastructure to CALL to a return thunk.  Remove
>> srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
>>
>> This removes one taken branch from every function return, which will reduce
>> the overhead of the mitigation.  It also removes one of three moving pieces
>> from the SRSO mess.
> So, the address of whatever instruction comes after the 'CALL
> srso_*_return_thunk' is added to the RSB/RAS, and that might be
> speculated to when the thunk returns.  Is that a concern?

That is very intentional, and key to the safety.

Replacing a RET with a CALL/{ADD,LEA}/RET sequence is a form of
retpoline thunk.  The only difference with regular retpolines is that
the intended target is already on the stack, and not in a GPR.


If the CALL mispredicts, it doesn't matter.  When decode catches up
(allegedly either instantaneously on Fam19h, or a few cycles late on
Fam17h), the top of the RAS is corrected will point at the INT3
following the CALL instruction.

When the CALL is corrected, speculation continues at the real
destination (the {ADD,LEA}/RET sequence) where the {ADD,LEA} pops the
"wrong" return address off the stack and lets the RET take the next
address up the stack.

The RET predicts to INT3 following the call (which is safe), and
eventually gets corrected to the parent return address on the stack
which is the real intended destination.

Therefore, rogue RET speculation is always safely contained at the INT3
until the RET uop can execute, notice the mispredict, and correct to
what the stack says is correct.

Of course, relying on the fact that the {ADD,LEA}+RET sequence doesn't
have poison in the BTB, which is what the UNTRAIN_RET sequence is trying
to achieve with microarchitectural means.

>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> ---
>> CC: x86@kernel.org
>> CC: linux-kernel@vger.kernel.org
>> CC: Borislav Petkov <bp@alien8.de>
>> CC: Peter Zijlstra <peterz@infradead.org>
>> CC: Josh Poimboeuf <jpoimboe@kernel.org>
>> CC: Babu Moger <babu.moger@amd.com>
>> CC: David.Kaplan@amd.com
>> CC: Nikolay Borisov <nik.borisov@suse.com>
>> CC: gregkh@linuxfoundation.org
>> CC: Thomas Gleixner <tglx@linutronix.de>
>>
>> RFC:
>>
>>   vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction
>>
>> Any objtool whisperers know what's going on, and particularly why
>> srso_fam19_return_thunk() appears to be happy?
>>
>> Also, depends on the resolution of the RFC in the previous patch.
> I can take a look.

Thanks.

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 23:01     ` Andrew Cooper
@ 2023-08-22  2:22       ` Josh Poimboeuf
  2023-08-22  6:45         ` Nikolay Borisov
  0 siblings, 1 reply; 20+ messages in thread
From: Josh Poimboeuf @ 2023-08-22  2:22 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Borislav Petkov, Peter Zijlstra, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Tue, Aug 22, 2023 at 12:01:29AM +0100, Andrew Cooper wrote:
> On 21/08/2023 4:16 pm, Josh Poimboeuf wrote:
> > On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
> >> The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
> >> has been made safe in the BTB.  Specifically, there needs to be no pertubance
> >> to the RAS between a correctly predicted CALL and the subsequent RET.
> >>
> >> Use the new infrastructure to CALL to a return thunk.  Remove
> >> srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
> >>
> >> This removes one taken branch from every function return, which will reduce
> >> the overhead of the mitigation.  It also removes one of three moving pieces
> >> from the SRSO mess.
> > So, the address of whatever instruction comes after the 'CALL
> > srso_*_return_thunk' is added to the RSB/RAS, and that might be
> > speculated to when the thunk returns.  Is that a concern?
> 
> That is very intentional, and key to the safety.
> 
> Replacing a RET with a CALL/{ADD,LEA}/RET sequence is a form of
> retpoline thunk.  The only difference with regular retpolines is that
> the intended target is already on the stack, and not in a GPR.
> 
> 
> If the CALL mispredicts, it doesn't matter.  When decode catches up
> (allegedly either instantaneously on Fam19h, or a few cycles late on
> Fam17h), the top of the RAS is corrected will point at the INT3
> following the CALL instruction.

That's the thing though, at least with my kernel/compiler combo there's
no INT3 after the JMP __x86_return_thunk, and there's no room to patch
one in after the CALL, as the JMP and CALL are both 5 bytes.

-- 
Josh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-22  2:22       ` Josh Poimboeuf
@ 2023-08-22  6:45         ` Nikolay Borisov
  2023-08-22 22:18           ` Josh Poimboeuf
  0 siblings, 1 reply; 20+ messages in thread
From: Nikolay Borisov @ 2023-08-22  6:45 UTC (permalink / raw)
  To: Josh Poimboeuf, Andrew Cooper
  Cc: LKML, x86, Borislav Petkov, Peter Zijlstra, Babu Moger,
	David.Kaplan, gregkh, Thomas Gleixner



On 22.08.23 г. 5:22 ч., Josh Poimboeuf wrote:
> On Tue, Aug 22, 2023 at 12:01:29AM +0100, Andrew Cooper wrote:
>> On 21/08/2023 4:16 pm, Josh Poimboeuf wrote:
>>> On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
>>>> The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
>>>> has been made safe in the BTB.  Specifically, there needs to be no pertubance
>>>> to the RAS between a correctly predicted CALL and the subsequent RET.
>>>>
>>>> Use the new infrastructure to CALL to a return thunk.  Remove
>>>> srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
>>>>
>>>> This removes one taken branch from every function return, which will reduce
>>>> the overhead of the mitigation.  It also removes one of three moving pieces
>>>> from the SRSO mess.
>>> So, the address of whatever instruction comes after the 'CALL
>>> srso_*_return_thunk' is added to the RSB/RAS, and that might be
>>> speculated to when the thunk returns.  Is that a concern?
>>
>> That is very intentional, and key to the safety.
>>
>> Replacing a RET with a CALL/{ADD,LEA}/RET sequence is a form of
>> retpoline thunk.  The only difference with regular retpolines is that
>> the intended target is already on the stack, and not in a GPR.
>>
>>
>> If the CALL mispredicts, it doesn't matter.  When decode catches up
>> (allegedly either instantaneously on Fam19h, or a few cycles late on
>> Fam17h), the top of the RAS is corrected will point at the INT3
>> following the CALL instruction.
> 
> That's the thing though, at least with my kernel/compiler combo there's
> no INT3 after the JMP __x86_return_thunk, and there's no room to patch
> one in after the CALL, as the JMP and CALL are both 5 bytes.

FWIW gcc's mfunction-return=thunk-return only ever generates a jmp, 
thunk/thunk-inline OTOH generates a "full fledged" thunk with all the 
necessary speculation catching tricks.

For reference:

https://godbolt.org/z/M1avYc63b
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
  2023-08-21 15:16   ` Josh Poimboeuf
  2023-08-21 21:19   ` kernel test robot
@ 2023-08-22  7:23   ` kernel test robot
  2023-09-13 13:17   ` Peter Zijlstra
  3 siblings, 0 replies; 20+ messages in thread
From: kernel test robot @ 2023-08-22  7:23 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: oe-kbuild-all

Hi Andrew,

[This is a private test report for your RFC patch.]
kernel test robot noticed the following build warnings:

[auto build test WARNING on 6405b72e8d17bd1875a56ae52d23ec3cd51b9d66]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrew-Cooper/x86-srso-Rename-srso_alias_-to-srso_fam19_/20230821-192855
base:   6405b72e8d17bd1875a56ae52d23ec3cd51b9d66
patch link:    https://lore.kernel.org/r/20230821112723.3995187-5-andrew.cooper3%40citrix.com
patch subject: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
config: x86_64-buildonly-randconfig-005-20230822 (https://download.01.org/0day-ci/archive/20230822/202308221422.3HLEiqns-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce: (https://download.01.org/0day-ci/archive/20230822/202308221422.3HLEiqns-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308221422.3HLEiqns-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> arch/x86/lib/retpoline.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-22  6:45         ` Nikolay Borisov
@ 2023-08-22 22:18           ` Josh Poimboeuf
  2023-08-23  6:08             ` Nikolay Borisov
  2023-09-13 12:50             ` Peter Zijlstra
  0 siblings, 2 replies; 20+ messages in thread
From: Josh Poimboeuf @ 2023-08-22 22:18 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Andrew Cooper, LKML, x86, Borislav Petkov, Peter Zijlstra,
	Babu Moger, David.Kaplan, gregkh, Thomas Gleixner

On Tue, Aug 22, 2023 at 09:45:07AM +0300, Nikolay Borisov wrote:
> 
> 
> On 22.08.23 г. 5:22 ч., Josh Poimboeuf wrote:
> > On Tue, Aug 22, 2023 at 12:01:29AM +0100, Andrew Cooper wrote:
> > > On 21/08/2023 4:16 pm, Josh Poimboeuf wrote:
> > > > On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
> > > > > The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
> > > > > has been made safe in the BTB.  Specifically, there needs to be no pertubance
> > > > > to the RAS between a correctly predicted CALL and the subsequent RET.
> > > > > 
> > > > > Use the new infrastructure to CALL to a return thunk.  Remove
> > > > > srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
> > > > > 
> > > > > This removes one taken branch from every function return, which will reduce
> > > > > the overhead of the mitigation.  It also removes one of three moving pieces
> > > > > from the SRSO mess.
> > > > So, the address of whatever instruction comes after the 'CALL
> > > > srso_*_return_thunk' is added to the RSB/RAS, and that might be
> > > > speculated to when the thunk returns.  Is that a concern?
> > > 
> > > That is very intentional, and key to the safety.
> > > 
> > > Replacing a RET with a CALL/{ADD,LEA}/RET sequence is a form of
> > > retpoline thunk.  The only difference with regular retpolines is that
> > > the intended target is already on the stack, and not in a GPR.
> > > 
> > > 
> > > If the CALL mispredicts, it doesn't matter.  When decode catches up
> > > (allegedly either instantaneously on Fam19h, or a few cycles late on
> > > Fam17h), the top of the RAS is corrected will point at the INT3
> > > following the CALL instruction.
> > 
> > That's the thing though, at least with my kernel/compiler combo there's
> > no INT3 after the JMP __x86_return_thunk, and there's no room to patch
> > one in after the CALL, as the JMP and CALL are both 5 bytes.
> 
> FWIW gcc's mfunction-return=thunk-return only ever generates a jmp,
> thunk/thunk-inline OTOH generates a "full fledged" thunk with all the
> necessary speculation catching tricks.
> 
> For reference:
> 
> https://godbolt.org/z/M1avYc63b

The problem is the call-site, not the thunk.  Ideally we'd have an
option which adds an INT3 after the 'JMP __x86_return_thunk'.

-- 
Josh

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-22 22:18           ` Josh Poimboeuf
@ 2023-08-23  6:08             ` Nikolay Borisov
  2023-09-13 12:50             ` Peter Zijlstra
  1 sibling, 0 replies; 20+ messages in thread
From: Nikolay Borisov @ 2023-08-23  6:08 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Andrew Cooper, LKML, x86, Borislav Petkov, Peter Zijlstra,
	Babu Moger, David.Kaplan, gregkh, Thomas Gleixner



On 23.08.23 г. 1:18 ч., Josh Poimboeuf wrote:
> On Tue, Aug 22, 2023 at 09:45:07AM +0300, Nikolay Borisov wrote:
>>
>>
>> On 22.08.23 г. 5:22 ч., Josh Poimboeuf wrote:
>>> On Tue, Aug 22, 2023 at 12:01:29AM +0100, Andrew Cooper wrote:
>>>> On 21/08/2023 4:16 pm, Josh Poimboeuf wrote:
>>>>> On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:
>>>>>> The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
>>>>>> has been made safe in the BTB.  Specifically, there needs to be no pertubance
>>>>>> to the RAS between a correctly predicted CALL and the subsequent RET.
>>>>>>
>>>>>> Use the new infrastructure to CALL to a return thunk.  Remove
>>>>>> srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().
>>>>>>
>>>>>> This removes one taken branch from every function return, which will reduce
>>>>>> the overhead of the mitigation.  It also removes one of three moving pieces
>>>>>> from the SRSO mess.
>>>>> So, the address of whatever instruction comes after the 'CALL
>>>>> srso_*_return_thunk' is added to the RSB/RAS, and that might be
>>>>> speculated to when the thunk returns.  Is that a concern?
>>>>
>>>> That is very intentional, and key to the safety.
>>>>
>>>> Replacing a RET with a CALL/{ADD,LEA}/RET sequence is a form of
>>>> retpoline thunk.  The only difference with regular retpolines is that
>>>> the intended target is already on the stack, and not in a GPR.
>>>>
>>>>
>>>> If the CALL mispredicts, it doesn't matter.  When decode catches up
>>>> (allegedly either instantaneously on Fam19h, or a few cycles late on
>>>> Fam17h), the top of the RAS is corrected will point at the INT3
>>>> following the CALL instruction.
>>>
>>> That's the thing though, at least with my kernel/compiler combo there's
>>> no INT3 after the JMP __x86_return_thunk, and there's no room to patch
>>> one in after the CALL, as the JMP and CALL are both 5 bytes.
>>
>> FWIW gcc's mfunction-return=thunk-return only ever generates a jmp,
>> thunk/thunk-inline OTOH generates a "full fledged" thunk with all the
>> necessary speculation catching tricks.
>>
>> For reference:
>>
>> https://godbolt.org/z/M1avYc63b
> 
> The problem is the call-site, not the thunk.  Ideally we'd have an
> option which adds an INT3 after the 'JMP __x86_return_thunk'.

The way I see it, it seems the int3/ud2 or w/e sequence belongs to the 
thunk and not the call site (what you said). However, Andrew's solution 
depends on the callsite sort of being the thunk.

It seems something like that has already been done for the indirect 
thunk but not for return thunk:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102952
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-22 22:18           ` Josh Poimboeuf
  2023-08-23  6:08             ` Nikolay Borisov
@ 2023-09-13 12:50             ` Peter Zijlstra
  1 sibling, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2023-09-13 12:50 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: Nikolay Borisov, Andrew Cooper, LKML, x86, Borislav Petkov,
	Babu Moger, David.Kaplan, gregkh, Thomas Gleixner

On Tue, Aug 22, 2023 at 03:18:28PM -0700, Josh Poimboeuf wrote:

> The problem is the call-site, not the thunk.  Ideally we'd have an
> option which adds an INT3 after the 'JMP __x86_return_thunk'.

The -mharden-sls=all option *SHOULD* be extended to unconditionally emit
INT3 after everyt JMP instruction -- including the one used for
-mfunction-return=thunk-extern.

This is a known missing mitigation for an AMD SLS issue.

Due to the whole branch-type-confusion thing, AMD CPUs can predict the
JMP as 'not-a-branch' and continue to the next instruction.

I'm sure Andrew has the proper name and CVE stashed away somewhere. IIRC
he even has a GCC bugzilla entry for it.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*()
  2023-08-21 11:27 ` [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*() Andrew Cooper
@ 2023-09-13 13:15   ` Peter Zijlstra
  0 siblings, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2023-09-13 13:15 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Borislav Petkov, Josh Poimboeuf, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Mon, Aug 21, 2023 at 12:27:21PM +0100, Andrew Cooper wrote:
> The naming is inconsistent.  Rename it to fam17 to state the microarchitecture
> it is applicable to, and to mirror the srso_fam19_*() change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> CC: x86@kernel.org
> CC: linux-kernel@vger.kernel.org
> CC: Borislav Petkov <bp@alien8.de>
> CC: Peter Zijlstra <peterz@infradead.org>
> CC: Josh Poimboeuf <jpoimboe@kernel.org>
> CC: Babu Moger <babu.moger@amd.com>
> CC: David.Kaplan@amd.com
> CC: Nikolay Borisov <nik.borisov@suse.com>
> CC: gregkh@linuxfoundation.org
> CC: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/x86/include/asm/nospec-branch.h |  4 ++--
>  arch/x86/kernel/cpu/bugs.c           |  2 +-
>  arch/x86/kernel/vmlinux.lds.S        |  2 +-
>  arch/x86/lib/retpoline.S             | 32 ++++++++++++++--------------
>  4 files changed, 20 insertions(+), 20 deletions(-)
> 

Re your objtool woes:

> -SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
> +	.skip 64 - (srso_fam17_safe_ret - srso_fam17_untrain_ret), 0xcc
> +SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
>  	ANNOTATE_NOENDBR
>  	.byte 0x48, 0xb8
>  
>  /*
>   * This forces the function return instruction to speculate into a trap
> - * (UD2 in srso_return_thunk() below).  This RET will then mispredict
> + * (UD2 in srso_fam17_return_thunk() below).  This RET will then mispredict
>   * and execution will continue at the return site read from the top of
>   * the stack.
>   */
> -SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
> +SYM_INNER_LABEL(srso_fam17_safe_ret, SYM_L_GLOBAL)

Note that there is a mention of 'srso_safe_ret' in
tools/objtool/arch/x86/decode.c:arch_is_embedded_insn() which you
'forgot' to update.

>  	lea 8(%_ASM_SP), %_ASM_SP
>  	ret
>  	int3
>  	int3
>  	/* end of movabs */
>  	lfence
> -	call srso_safe_ret
> +	call srso_fam17_safe_ret
>  	ud2
> -SYM_CODE_END(srso_safe_ret)
> -SYM_FUNC_END(srso_untrain_ret)
> -__EXPORT_THUNK(srso_untrain_ret)
> +SYM_CODE_END(srso_fam17_safe_ret)
> +SYM_FUNC_END(srso_fam17_untrain_ret)
> +__EXPORT_THUNK(srso_fam17_untrain_ret)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead
  2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
                     ` (2 preceding siblings ...)
  2023-08-22  7:23   ` kernel test robot
@ 2023-09-13 13:17   ` Peter Zijlstra
  3 siblings, 0 replies; 20+ messages in thread
From: Peter Zijlstra @ 2023-09-13 13:17 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Borislav Petkov, Josh Poimboeuf, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Mon, Aug 21, 2023 at 12:27:23PM +0100, Andrew Cooper wrote:

> 
>   vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction
> 

> @@ -288,26 +283,22 @@ SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
>   * and execution will continue at the return site read from the top of
>   * the stack.
>   */
> -SYM_INNER_LABEL(srso_fam17_safe_ret, SYM_L_GLOBAL)
> +SYM_INNER_LABEL(srso_fam17_return_thunk, SYM_L_GLOBAL)

This srso_safe_ret -> srso_fam17_safe_ret you forgot in the last patch,
is then here renamed yet again to srso_fam17_return_thunk.

And there is your objtool splat.

> +	UNWIND_HINT_FUNC
> +	ANNOTATE_NOENDBR
>  	lea 8(%_ASM_SP), %_ASM_SP
> +	ANNOTATE_UNRET_SAFE
>  	ret
>  	int3
>  	int3
>  	/* end of movabs */
>  	lfence
> -	call srso_fam17_safe_ret
> +	call srso_fam17_return_thunk
>  	ud2
> -SYM_CODE_END(srso_fam17_safe_ret)
> +SYM_CODE_END(srso_fam17_return_thunk)
>  SYM_FUNC_END(srso_fam17_untrain_ret)
>  __EXPORT_THUNK(srso_fam17_untrain_ret)
>  

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*()
  2023-08-21 11:27 ` [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*() Andrew Cooper
@ 2023-09-13 13:46   ` Borislav Petkov
  2023-09-13 14:27     ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Borislav Petkov @ 2023-09-13 13:46 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Peter Zijlstra, Josh Poimboeuf, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Mon, Aug 21, 2023 at 12:27:20PM +0100, Andrew Cooper wrote:
> The 'alias' name name is an internal detail of how the logic works.  Rename it
> to state which microarchitecture is is applicable to.

Sorry, no. Hardcoding the family into some function is a backwards. The
moment you need to apply this to some other family, it becomes wrong.

And I prefer much more "srso" and "srso_alias".

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*()
  2023-09-13 13:46   ` Borislav Petkov
@ 2023-09-13 14:27     ` Andrew Cooper
  2023-09-13 14:46       ` Borislav Petkov
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2023-09-13 14:27 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: LKML, x86, Peter Zijlstra, Josh Poimboeuf, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On 13/09/2023 2:46 pm, Borislav Petkov wrote:
> On Mon, Aug 21, 2023 at 12:27:20PM +0100, Andrew Cooper wrote:
>> The 'alias' name name is an internal detail of how the logic works.  Rename it
>> to state which microarchitecture is is applicable to.
> Sorry, no. Hardcoding the family into some function is a backwards. The
> moment you need to apply this to some other family, it becomes wrong.
>
> And I prefer much more "srso" and "srso_alias".

You literally have one set of functions which is not safe to use on
anything other than fam17, and a different set of functions which is not
safe to use on anything other than fam19.  Neither are safe to use under
virt, which is an outstanding security vulnerability in the SRSO work.

Given the clustermess that is SRSO, it's not as if the fam1a BTB is
going to be reverted back to look like a fam19 one, so "different
families" isn't going to happen.  The most likely thing to happen is
that you'll have to invent a $FOO_different_alias when a 3rd BTB
structure is shown to have related problems.

I know you may like $FOO and $FOO_alias, but an alias infix on one of a
pair implies they're related when in fact they are not.  It takes a the
already-insanely-complicated logic and makes even harder to follow.

Naming is very important for clarity/understanding, and the current
naming here is doing it's damn hardest to make the logic impossible to
follow, edit, and crucially, fix.

~Andrew

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*()
  2023-09-13 14:27     ` Andrew Cooper
@ 2023-09-13 14:46       ` Borislav Petkov
  0 siblings, 0 replies; 20+ messages in thread
From: Borislav Petkov @ 2023-09-13 14:46 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: LKML, x86, Peter Zijlstra, Josh Poimboeuf, Babu Moger,
	David.Kaplan, Nikolay Borisov, gregkh, Thomas Gleixner

On Wed, Sep 13, 2023 at 03:27:00PM +0100, Andrew Cooper wrote:
> I know you may like $FOO and $FOO_alias, but an alias infix on one of a
> pair implies they're related when in fact they are not.  It takes a the
> already-insanely-complicated logic and makes even harder to follow.

Maybe, but adding the family into the function name doesn't make it more
clear. After all, the family is just a number.

I'm open to other suggestions which make this logic easier to follow
- although this is as confusing as it gets already and I doubt that
calling it whatever would make it more clear...

In any case, the family number ain't the right one.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2023-09-13 14:46 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-21 11:27 [PATCH 0/4] x86/srso: Reduce overhead of the mitigation Andrew Cooper
2023-08-21 11:27 ` [PATCH 1/4] x86/srso: Rename srso_alias_*() to srso_fam19_*() Andrew Cooper
2023-09-13 13:46   ` Borislav Petkov
2023-09-13 14:27     ` Andrew Cooper
2023-09-13 14:46       ` Borislav Petkov
2023-08-21 11:27 ` [PATCH 2/4] x86/srso: Rename fam17 SRSO infrastructure to srso_fam17_*() Andrew Cooper
2023-09-13 13:15   ` Peter Zijlstra
2023-08-21 11:27 ` [PATCH RFC 3/4] x86/ret-thunk: Support CALL-ing to the ret-thunk Andrew Cooper
2023-08-21 11:27 ` [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead Andrew Cooper
2023-08-21 15:16   ` Josh Poimboeuf
2023-08-21 23:01     ` Andrew Cooper
2023-08-22  2:22       ` Josh Poimboeuf
2023-08-22  6:45         ` Nikolay Borisov
2023-08-22 22:18           ` Josh Poimboeuf
2023-08-23  6:08             ` Nikolay Borisov
2023-09-13 12:50             ` Peter Zijlstra
2023-08-21 21:19   ` kernel test robot
2023-08-22  7:23   ` kernel test robot
2023-09-13 13:17   ` Peter Zijlstra
  -- strict thread matches above, loose matches on Subject: below --
2023-08-21 18:06 kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.