From: Peter Zijlstra <peterz@infradead.org>
To: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Alexandre Chartre <alexandre.chartre@oracle.com>,
x86@kernel.org, linux-kernel@vger.kernel.org,
jthierry@redhat.com, tglx@linutronix.de
Subject: Re: [PATCH V2 9/9] x86/speculation: Remove all ANNOTATE_NOSPEC_ALTERNATIVE directives
Date: Wed, 8 Apr 2020 23:35:08 +0200 [thread overview]
Message-ID: <20200408213508.GA4496@worktop.programming.kicks-ass.net> (raw)
In-Reply-To: <20200407172739.GI20730@hirez.programming.kicks-ass.net>
On Tue, Apr 07, 2020 at 07:27:39PM +0200, Peter Zijlstra wrote:
> On Tue, Apr 07, 2020 at 11:28:38AM -0500, Josh Poimboeuf wrote:
> > Again, we should warn on stack changes inside alternatives, and then
> > look at converting RSB and retpolines to use static branches so they
> > have deterministic stacks.
>
> I don't think we need static brancher, we should just out-of-line the
> whole thing.
>
> Let me sort this CFI error Thomas is getting and then I'll attempt a
> patch along the lines I outlined in earlier emails.
Something like so.. seems to build and boot.
---
From: Peter Zijlstra (Intel) <peterz@infradead.org>
Subject: x86: Out-of-line retpoline
Since GCC generated code already uses out-of-line retpolines and objtool
has trouble with retpolines in alternatives, out-of-line them entirely.
This will enable objtool (once it's been taught a few more tricks) to
generate valid ORC data for the out-of-line copies, which means we can
correctly and reliably unwind through a retpoline.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/crypto/aesni-intel_asm.S | 4 +--
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 2 +-
arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 2 +-
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 26 ++++++++---------
arch/x86/entry/entry_32.S | 6 ++--
arch/x86/entry/entry_64.S | 2 +-
arch/x86/include/asm/asm-prototypes.h | 8 ++++--
arch/x86/include/asm/nospec-branch.h | 42 ++++------------------------
arch/x86/kernel/ftrace_32.S | 2 +-
arch/x86/kernel/ftrace_64.S | 4 +--
arch/x86/lib/checksum_32.S | 4 +--
arch/x86/lib/retpoline.S | 27 +++++++++++++++---
arch/x86/platform/efi/efi_stub_64.S | 2 +-
13 files changed, 62 insertions(+), 69 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index cad6e1bfa7d5..54e7d15dbd0d 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- CALL_NOSPEC %r11
+ CALL_NOSPEC r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- CALL_NOSPEC %r11
+ CALL_NOSPEC r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index d01ddd73de65..ecc0a9a905c4 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- CALL_NOSPEC %r9;
+ CALL_NOSPEC r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 563ef6e83cdd..0907243c501c 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- CALL_NOSPEC %r9;
+ CALL_NOSPEC r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 0e6690e3618c..8501ec4532f4 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -75,7 +75,7 @@
.text
SYM_FUNC_START(crc_pcl)
-#define bufp %rdi
+#define bufp rdi
#define bufp_dw %edi
#define bufp_w %di
#define bufp_b %dil
@@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl)
## 1) ALIGN:
################################################################
- mov bufp, bufptmp # rdi = *buf
- neg bufp
- and $7, bufp # calculate the unalignment amount of
+ mov %bufp, bufptmp # rdi = *buf
+ neg %bufp
+ and $7, %bufp # calculate the unalignment amount of
# the address
je proc_block # Skip if aligned
@@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl)
do_align:
#### Calculate CRC of unaligned bytes of the buffer (if any)
movq (bufptmp), tmp # load a quadward from the buffer
- add bufp, bufptmp # align buffer pointer for quadword
+ add %bufp, bufptmp # align buffer pointer for quadword
# processing
- sub bufp, len # update buffer length
+ sub %bufp, len # update buffer length
align_loop:
crc32b %bl, crc_init_dw # compute crc32 of 1-byte
shr $8, tmp # get next byte
- dec bufp
+ dec %bufp
jne align_loop
proc_block:
@@ -169,10 +169,10 @@ continue_block:
xor crc2, crc2
## branch into array
- lea jump_table(%rip), bufp
- movzxw (bufp, %rax, 2), len
- lea crc_array(%rip), bufp
- lea (bufp, len, 1), bufp
+ lea jump_table(%rip), %bufp
+ movzxw (%bufp, %rax, 2), len
+ lea crc_array(%rip), %bufp
+ lea (%bufp, len, 1), %bufp
JMP_NOSPEC bufp
################################################################
@@ -218,9 +218,9 @@ LABEL crc_ %i
## 4) Combine three results:
################################################################
- lea (K_table-8)(%rip), bufp # first entry is for idx 1
+ lea (K_table-8)(%rip), %bufp # first entry is for idx 1
shlq $3, %rax # rax *= 8
- pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2
+ pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2
leal (%eax,%eax,2), %eax # rax *= 3 (total *24)
subq %rax, tmp # tmp -= rax*24
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index b67bae7091d7..7e7ffb7a5147 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork)
/* kernel thread */
1: movl %edi, %eax
- CALL_NOSPEC %ebx
+ CALL_NOSPEC ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
+ CALL_NOSPEC edi
jmp ret_from_exception
SYM_CODE_END(common_exception_read_cr2)
@@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception)
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- CALL_NOSPEC %edi
+ CALL_NOSPEC edi
jmp ret_from_exception
SYM_CODE_END(common_exception)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0e9504fabe52..168b798913bc 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -349,7 +349,7 @@ SYM_CODE_START(ret_from_fork)
/* kernel thread */
UNWIND_HINT_EMPTY
movq %r12, %rdi
- CALL_NOSPEC %rbx
+ CALL_NOSPEC rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index ce92c4acc913..a75195f159cc 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -18,9 +18,13 @@ extern void cmpxchg8b_emu(void);
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_32
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#define INDIRECT_THUNK(reg) \
+ extern asmlinkage void __x86_retpoline_e ## reg(void); \
+ extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
#else
-#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+#define INDIRECT_THUNK(reg) \
+ extern asmlinkage void __x86_retpoline_r ## reg(void); \
+ extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
INDIRECT_THUNK(8)
INDIRECT_THUNK(9)
INDIRECT_THUNK(10)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 07e95dcb40ad..0a3ceab5e0ec 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -76,34 +76,6 @@
.popsection
.endm
-/*
- * These are the bare retpoline primitives for indirect jmp and call.
- * Do not use these directly; they only exist to make the ALTERNATIVE
- * invocation below less ugly.
- */
-.macro RETPOLINE_JMP reg:req
- call .Ldo_rop_\@
-.Lspec_trap_\@:
- pause
- lfence
- jmp .Lspec_trap_\@
-.Ldo_rop_\@:
- mov \reg, (%_ASM_SP)
- ret
-.endm
-
-/*
- * This is a wrapper around RETPOLINE_JMP so the called function in reg
- * returns to the instruction after the macro.
- */
-.macro RETPOLINE_CALL reg:req
- jmp .Ldo_call_\@
-.Ldo_retpoline_jmp_\@:
- RETPOLINE_JMP \reg
-.Ldo_call_\@:
- call .Ldo_retpoline_jmp_\@
-.endm
-
/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
@@ -111,10 +83,9 @@
*/
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
- __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
+ __stringify(jmp __x86_retpoline_\()\reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *\reg
#endif
@@ -122,10 +93,9 @@
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
- ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
- __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \
+ __stringify(call __x86_retpoline_\()\reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD
#else
call *\reg
#endif
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index e8a9f8370112..e405fe1a8bf4 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -189,5 +189,5 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- JMP_NOSPEC %ecx
+ JMP_NOSPEC ecx
#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 369e61faacfe..2f2b5702e6cf 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -303,7 +303,7 @@ trace:
* function tracing is enabled.
*/
movq ftrace_trace_function, %r8
- CALL_NOSPEC %r8
+ CALL_NOSPEC r8
restore_mcount_regs
jmp fgraph_trace
@@ -340,6 +340,6 @@ SYM_CODE_START(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- JMP_NOSPEC %rdi
+ JMP_NOSPEC rdi
SYM_CODE_END(return_to_handler)
#endif
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4742e8fa7ee7..d1d768912368 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- JMP_NOSPEC %ebx
+ JMP_NOSPEC ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 363ec132df7e..d4aef0c856a6 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -8,14 +8,31 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
+/*
+ * This is the bare retpoline primitive.
+ */
+.macro RETPOLINE_JMP reg:req
+ call .Ldo_rop_\@
+.Lspec_trap_\@:
+ pause
+ lfence
+ jmp .Lspec_trap_\@
+.Ldo_rop_\@:
+ mov \reg, (%_ASM_SP)
+ ret
+.endm
+
.macro THUNK reg
.section .text.__x86.indirect_thunk
+SYM_FUNC_START(__x86_retpoline_\reg)
+ RETPOLINE_JMP %\reg
+SYM_FUNC_END(__x86_retpoline_\reg)
+
SYM_FUNC_START(__x86_indirect_thunk_\reg)
- CFI_STARTPROC
- JMP_NOSPEC %\reg
- CFI_ENDPROC
+ JMP_NOSPEC \reg
SYM_FUNC_END(__x86_indirect_thunk_\reg)
+
.endm
/*
@@ -26,7 +43,9 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
* the simple and nasty way...
*/
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#define EXPORT_THUNK(reg) \
+ __EXPORT_THUNK(__x86_retpoline_ ## reg); \
+ __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
GENERATE_THUNK(_ASM_AX)
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 15da118f04f0..90380a17ab23 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call)
mov %r8, %r9
mov %rcx, %r8
mov %rsi, %rcx
- CALL_NOSPEC %rdi
+ CALL_NOSPEC rdi
leave
ret
SYM_FUNC_END(__efi_call)
next prev parent reply other threads:[~2020-04-08 21:35 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-07 7:31 [PATCH V2 0/9] objtool changes to remove all ANNOTATE_NOSPEC_ALTERNATIVE Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 1/9] objtool: Introduce HINT_RET_OFFSET Alexandre Chartre
2020-04-07 12:53 ` Peter Zijlstra
2020-04-07 13:17 ` Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 2/9] objtool: UNWIND_HINT_RET_OFFSET should not check registers Alexandre Chartre
2020-05-01 18:22 ` [tip: objtool/core] " tip-bot2 for Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 3/9] objtool: is_fentry_call() crashes if call has no destination Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 4/9] objtool: Allow branches within the same alternative Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 5/9] objtool: Add support for intra-function calls Alexandre Chartre
2020-04-07 13:07 ` Peter Zijlstra
2020-04-07 13:28 ` Alexandre Chartre
2020-04-08 14:06 ` Alexandre Chartre
2020-04-08 14:19 ` Julien Thierry
2020-04-08 16:03 ` Alexandre Chartre
2020-04-08 16:04 ` Julien Thierry
2020-04-08 17:06 ` Alexandre Chartre
2020-04-08 17:07 ` Julien Thierry
2020-04-07 7:31 ` [PATCH V2 6/9] x86/speculation: Change __FILL_RETURN_BUFFER to work with objtool Alexandre Chartre
2020-04-07 13:27 ` Josh Poimboeuf
2020-04-07 7:31 ` [PATCH V2 7/9] x86/speculation: Annotate intra-function calls Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 8/9] x86/speculation: Add unwind hint to trampoline return Alexandre Chartre
2020-04-07 7:31 ` [PATCH V2 9/9] x86/speculation: Remove all ANNOTATE_NOSPEC_ALTERNATIVE directives Alexandre Chartre
2020-04-07 13:28 ` Peter Zijlstra
2020-04-07 13:34 ` Josh Poimboeuf
2020-04-07 14:32 ` Alexandre Chartre
2020-04-07 16:18 ` Alexandre Chartre
2020-04-07 16:28 ` Josh Poimboeuf
2020-04-07 17:01 ` Alexandre Chartre
2020-04-07 17:26 ` Peter Zijlstra
2020-04-07 17:27 ` Peter Zijlstra
2020-04-08 21:35 ` Peter Zijlstra [this message]
2020-04-09 8:18 ` Alexandre Chartre
2020-04-09 10:34 ` Peter Zijlstra
2020-04-09 10:40 ` Peter Zijlstra
2020-04-07 16:41 ` Peter Zijlstra
2020-04-07 17:04 ` Alexandre Chartre
2020-04-07 13:52 ` Peter Zijlstra
2020-04-07 13:59 ` Peter Zijlstra
2020-04-07 13:35 ` [PATCH V2 0/9] objtool changes to remove all ANNOTATE_NOSPEC_ALTERNATIVE Josh Poimboeuf
2020-04-07 14:02 ` Alexandre Chartre
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200408213508.GA4496@worktop.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=alexandre.chartre@oracle.com \
--cc=jpoimboe@redhat.com \
--cc=jthierry@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.