* [PATCH 0/8] x86: kCFI and IBT cleanups
@ 2024-11-05 11:39 Peter Zijlstra
2024-11-05 11:39 ` [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI Peter Zijlstra
` (7 more replies)
0 siblings, 8 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
Hi!
This is the 'safe' set of patches that cleans up a lot of the kCFI / IBT
things.
For now, I've dropped the IBT+ and FineIBT-BHI bits, we'll get back to those
once the dust settles.
Also available here:
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git x86/ibt
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 14:16 ` Christoph Hellwig
2024-11-05 11:39 ` [PATCH 2/8] x86/cfi: Clean up linkage Peter Zijlstra
` (6 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
The expectation is that all EXPORT'ed symbols are free to have their
address taken and called indirectly. The majority of the assembly
defined functions currently violate this expectation.
Make then all use SYM_TYPED_FUNC_START() in order to emit the proper
kCFI preamble.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 7 ++++---
arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 1 +
arch/x86/crypto/camellia-x86_64-asm_64.S | 9 +++++----
arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 7 ++++---
arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 5 +++--
arch/x86/crypto/twofish-x86_64-asm_64.S | 5 +++--
arch/x86/entry/entry.S | 3 ++-
arch/x86/entry/entry_64.S | 5 +++--
arch/x86/lib/clear_page_64.S | 9 +++++----
arch/x86/lib/copy_page_64.S | 3 ++-
arch/x86/lib/copy_user_64.S | 3 ++-
arch/x86/lib/copy_user_uncached_64.S | 3 ++-
arch/x86/lib/getuser.S | 17 +++++++++--------
arch/x86/lib/hweight.S | 5 +++--
arch/x86/lib/memmove_64.S | 3 ++-
arch/x86/lib/memset_64.S | 3 ++-
arch/x86/lib/msr-reg.S | 3 ++-
arch/x86/lib/putuser.S | 17 +++++++++--------
18 files changed, 63 insertions(+), 45 deletions(-)
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -16,6 +16,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -882,7 +883,7 @@ SYM_FUNC_START_LOCAL(__camellia_dec_blk1
jmp .Ldec_max24;
SYM_FUNC_END(__camellia_dec_blk16)
-SYM_FUNC_START(camellia_ecb_enc_16way)
+SYM_TYPED_FUNC_START(camellia_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -907,7 +908,7 @@ SYM_FUNC_START(camellia_ecb_enc_16way)
RET;
SYM_FUNC_END(camellia_ecb_enc_16way)
-SYM_FUNC_START(camellia_ecb_dec_16way)
+SYM_TYPED_FUNC_START(camellia_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -937,7 +938,7 @@ SYM_FUNC_START(camellia_ecb_dec_16way)
RET;
SYM_FUNC_END(camellia_ecb_dec_16way)
-SYM_FUNC_START(camellia_cbc_dec_16way)
+SYM_TYPED_FUNC_START(camellia_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
.file "camellia-x86_64-asm_64.S"
.text
@@ -177,7 +178,7 @@
bswapq RAB0; \
movq RAB0, 4*2(RIO);
-SYM_FUNC_START(__camellia_enc_blk)
+SYM_TYPED_FUNC_START(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -224,7 +225,7 @@ SYM_FUNC_START(__camellia_enc_blk)
RET;
SYM_FUNC_END(__camellia_enc_blk)
-SYM_FUNC_START(camellia_dec_blk)
+SYM_TYPED_FUNC_START(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -411,7 +412,7 @@ SYM_FUNC_END(camellia_dec_blk)
bswapq RAB1; \
movq RAB1, 12*2(RIO);
-SYM_FUNC_START(__camellia_enc_blk_2way)
+SYM_TYPED_FUNC_START(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -460,7 +461,7 @@ SYM_FUNC_START(__camellia_enc_blk_2way)
RET;
SYM_FUNC_END(__camellia_enc_blk_2way)
-SYM_FUNC_START(camellia_dec_blk_2way)
+SYM_TYPED_FUNC_START(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/frame.h>
#include "glue_helper-asm-avx.S"
@@ -656,7 +657,7 @@ SYM_FUNC_START_LOCAL(__serpent_dec_blk8_
RET;
SYM_FUNC_END(__serpent_dec_blk8_avx)
-SYM_FUNC_START(serpent_ecb_enc_8way_avx)
+SYM_TYPED_FUNC_START(serpent_ecb_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -674,7 +675,7 @@ SYM_FUNC_START(serpent_ecb_enc_8way_avx)
RET;
SYM_FUNC_END(serpent_ecb_enc_8way_avx)
-SYM_FUNC_START(serpent_ecb_dec_8way_avx)
+SYM_TYPED_FUNC_START(serpent_ecb_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -692,7 +693,7 @@ SYM_FUNC_START(serpent_ecb_dec_8way_avx)
RET;
SYM_FUNC_END(serpent_ecb_dec_8way_avx)
-SYM_FUNC_START(serpent_cbc_dec_8way_avx)
+SYM_TYPED_FUNC_START(serpent_cbc_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -6,6 +6,7 @@
*/
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
.file "twofish-x86_64-asm-3way.S"
.text
@@ -220,7 +221,7 @@
rorq $32, RAB2; \
outunpack3(mov, RIO, 2, RAB, 2);
-SYM_FUNC_START(__twofish_enc_blk_3way)
+SYM_TYPED_FUNC_START(__twofish_enc_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -269,7 +270,7 @@ SYM_FUNC_START(__twofish_enc_blk_3way)
RET;
SYM_FUNC_END(__twofish_enc_blk_3way)
-SYM_FUNC_START(twofish_dec_blk_3way)
+SYM_TYPED_FUNC_START(twofish_dec_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -8,6 +8,7 @@
.text
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm-offsets.h>
#define a_offset 0
@@ -202,7 +203,7 @@
xor %r8d, d ## D;\
ror $1, d ## D;
-SYM_FUNC_START(twofish_enc_blk)
+SYM_TYPED_FUNC_START(twofish_enc_blk)
pushq R1
/* %rdi contains the ctx address */
@@ -255,7 +256,7 @@ SYM_FUNC_START(twofish_enc_blk)
RET
SYM_FUNC_END(twofish_enc_blk)
-SYM_FUNC_START(twofish_dec_blk)
+SYM_TYPED_FUNC_START(twofish_dec_blk)
pushq R1
/* %rdi contains the ctx address */
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/segment.h>
@@ -16,7 +17,7 @@
.pushsection .noinstr.text, "ax"
-SYM_FUNC_START(entry_ibpb)
+SYM_TYPED_FUNC_START(entry_ibpb)
movl $MSR_IA32_PRED_CMD, %ecx
movl $PRED_CMD_IBPB, %eax
xorl %edx, %edx
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -20,6 +20,7 @@
*/
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
@@ -742,7 +743,7 @@ _ASM_NOKPROBE(common_interrupt_return)
*
* Is in entry.text as it shouldn't be instrumented.
*/
-SYM_FUNC_START(asm_load_gs_index)
+SYM_TYPED_FUNC_START(asm_load_gs_index)
FRAME_BEGIN
swapgs
.Lgs_change:
@@ -1526,7 +1527,7 @@ SYM_CODE_END(rewind_stack_and_make_dead)
* The alignment is for performance and not for safety, and may be safely
* refactored in the future if needed.
*/
-SYM_FUNC_START(clear_bhb_loop)
+SYM_TYPED_FUNC_START(clear_bhb_loop)
push %rbp
mov %rsp, %rbp
movl $5, %ecx
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
/*
@@ -14,7 +15,7 @@
* Zero a page.
* %rdi - page
*/
-SYM_FUNC_START(clear_page_rep)
+SYM_TYPED_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
@@ -22,7 +23,7 @@ SYM_FUNC_START(clear_page_rep)
SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
-SYM_FUNC_START(clear_page_orig)
+SYM_TYPED_FUNC_START(clear_page_orig)
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -44,7 +45,7 @@ SYM_FUNC_START(clear_page_orig)
SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
-SYM_FUNC_START(clear_page_erms)
+SYM_TYPED_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
@@ -62,7 +63,7 @@ EXPORT_SYMBOL_GPL(clear_page_erms)
* Output:
* rcx: uncleared bytes or 0 if successful.
*/
-SYM_FUNC_START(rep_stos_alternative)
+SYM_TYPED_FUNC_START(rep_stos_alternative)
cmpq $64,%rcx
jae .Lunrolled
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -13,7 +14,7 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-SYM_FUNC_START(copy_page)
+SYM_TYPED_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/asm.h>
@@ -29,7 +30,7 @@
* just a plain 'rep movs' on machines that have FSRM. But to make
* it simpler for us, we can clobber rsi/rdi and rax freely.
*/
-SYM_FUNC_START(rep_movs_alternative)
+SYM_TYPED_FUNC_START(rep_movs_alternative)
cmpq $64,%rcx
jae .Llarge
--- a/arch/x86/lib/copy_user_uncached_64.S
+++ b/arch/x86/lib/copy_user_uncached_64.S
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
/*
@@ -26,7 +27,7 @@
* Output:
* rax uncopied bytes or 0 if successful.
*/
-SYM_FUNC_START(__copy_user_nocache)
+SYM_TYPED_FUNC_START(__copy_user_nocache)
/* If destination is not 7-byte aligned, we'll have to align it */
testb $7,%dil
jne .Lalign
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -28,6 +28,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/page_types.h>
#include <asm/errno.h>
#include <asm/asm-offsets.h>
@@ -62,7 +63,7 @@
.text
-SYM_FUNC_START(__get_user_1)
+SYM_TYPED_FUNC_START(__get_user_1)
check_range size=1
ASM_STAC
UACCESS movzbl (%_ASM_AX),%edx
@@ -72,7 +73,7 @@ SYM_FUNC_START(__get_user_1)
SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
-SYM_FUNC_START(__get_user_2)
+SYM_TYPED_FUNC_START(__get_user_2)
check_range size=2
ASM_STAC
UACCESS movzwl (%_ASM_AX),%edx
@@ -82,7 +83,7 @@ SYM_FUNC_START(__get_user_2)
SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
-SYM_FUNC_START(__get_user_4)
+SYM_TYPED_FUNC_START(__get_user_4)
check_range size=4
ASM_STAC
UACCESS movl (%_ASM_AX),%edx
@@ -92,7 +93,7 @@ SYM_FUNC_START(__get_user_4)
SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
-SYM_FUNC_START(__get_user_8)
+SYM_TYPED_FUNC_START(__get_user_8)
#ifndef CONFIG_X86_64
xor %ecx,%ecx
#endif
@@ -111,7 +112,7 @@ SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
/* .. and the same for __get_user, just without the range checks */
-SYM_FUNC_START(__get_user_nocheck_1)
+SYM_TYPED_FUNC_START(__get_user_nocheck_1)
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movzbl (%_ASM_AX),%edx
@@ -121,7 +122,7 @@ SYM_FUNC_START(__get_user_nocheck_1)
SYM_FUNC_END(__get_user_nocheck_1)
EXPORT_SYMBOL(__get_user_nocheck_1)
-SYM_FUNC_START(__get_user_nocheck_2)
+SYM_TYPED_FUNC_START(__get_user_nocheck_2)
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movzwl (%_ASM_AX),%edx
@@ -131,7 +132,7 @@ SYM_FUNC_START(__get_user_nocheck_2)
SYM_FUNC_END(__get_user_nocheck_2)
EXPORT_SYMBOL(__get_user_nocheck_2)
-SYM_FUNC_START(__get_user_nocheck_4)
+SYM_TYPED_FUNC_START(__get_user_nocheck_4)
ASM_STAC
ASM_BARRIER_NOSPEC
UACCESS movl (%_ASM_AX),%edx
@@ -141,7 +142,7 @@ SYM_FUNC_START(__get_user_nocheck_4)
SYM_FUNC_END(__get_user_nocheck_4)
EXPORT_SYMBOL(__get_user_nocheck_4)
-SYM_FUNC_START(__get_user_nocheck_8)
+SYM_TYPED_FUNC_START(__get_user_nocheck_8)
ASM_STAC
ASM_BARRIER_NOSPEC
#ifdef CONFIG_X86_64
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
@@ -8,7 +9,7 @@
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
-SYM_FUNC_START(__sw_hweight32)
+SYM_TYPED_FUNC_START(__sw_hweight32)
#ifdef CONFIG_X86_64
movl %edi, %eax # w
@@ -41,7 +42,7 @@ EXPORT_SYMBOL(__sw_hweight32)
* on top of __arch_hweight32():
*/
#ifdef CONFIG_X86_64
-SYM_FUNC_START(__sw_hweight64)
+SYM_TYPED_FUNC_START(__sw_hweight64)
pushq %rdi
pushq %rdx
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -8,6 +8,7 @@
*/
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -26,7 +27,7 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START(__memmove)
+SYM_TYPED_FUNC_START(__memmove)
mov %rdi, %rax
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -28,7 +29,7 @@
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
-SYM_FUNC_START(__memset)
+SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <linux/errno.h>
+#include <linux/cfi_types.h>
#include <asm/asm.h>
#include <asm/msr.h>
@@ -12,7 +13,7 @@
*
*/
.macro op_safe_regs op
-SYM_FUNC_START(\op\()_safe_regs)
+SYM_TYPED_FUNC_START(\op\()_safe_regs)
pushq %rbx
pushq %r12
movq %rdi, %r10 /* Save pointer */
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -13,6 +13,7 @@
*/
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/thread_info.h>
#include <asm/errno.h>
#include <asm/asm.h>
@@ -44,7 +45,7 @@
.endm
.text
-SYM_FUNC_START(__put_user_1)
+SYM_TYPED_FUNC_START(__put_user_1)
check_range size=1
ASM_STAC
1: movb %al,(%_ASM_CX)
@@ -54,7 +55,7 @@ SYM_FUNC_START(__put_user_1)
SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
-SYM_FUNC_START(__put_user_nocheck_1)
+SYM_TYPED_FUNC_START(__put_user_nocheck_1)
ASM_STAC
2: movb %al,(%_ASM_CX)
xor %ecx,%ecx
@@ -63,7 +64,7 @@ SYM_FUNC_START(__put_user_nocheck_1)
SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1)
-SYM_FUNC_START(__put_user_2)
+SYM_TYPED_FUNC_START(__put_user_2)
check_range size=2
ASM_STAC
3: movw %ax,(%_ASM_CX)
@@ -73,7 +74,7 @@ SYM_FUNC_START(__put_user_2)
SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
-SYM_FUNC_START(__put_user_nocheck_2)
+SYM_TYPED_FUNC_START(__put_user_nocheck_2)
ASM_STAC
4: movw %ax,(%_ASM_CX)
xor %ecx,%ecx
@@ -82,7 +83,7 @@ SYM_FUNC_START(__put_user_nocheck_2)
SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2)
-SYM_FUNC_START(__put_user_4)
+SYM_TYPED_FUNC_START(__put_user_4)
check_range size=4
ASM_STAC
5: movl %eax,(%_ASM_CX)
@@ -92,7 +93,7 @@ SYM_FUNC_START(__put_user_4)
SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
-SYM_FUNC_START(__put_user_nocheck_4)
+SYM_TYPED_FUNC_START(__put_user_nocheck_4)
ASM_STAC
6: movl %eax,(%_ASM_CX)
xor %ecx,%ecx
@@ -101,7 +102,7 @@ SYM_FUNC_START(__put_user_nocheck_4)
SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4)
-SYM_FUNC_START(__put_user_8)
+SYM_TYPED_FUNC_START(__put_user_8)
check_range size=8
ASM_STAC
7: mov %_ASM_AX,(%_ASM_CX)
@@ -114,7 +115,7 @@ SYM_FUNC_START(__put_user_8)
SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
-SYM_FUNC_START(__put_user_nocheck_8)
+SYM_TYPED_FUNC_START(__put_user_nocheck_8)
ASM_STAC
9: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 2/8] x86/cfi: Clean up linkage
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
2024-11-05 11:39 ` [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 11:39 ` [PATCH 3/8] x86/boot: Mark start_secondary() with __noendbr Peter Zijlstra
` (5 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
With the introduction of kCFI the addition of ENDBR to
SYM_FUNC_START* no longer suffices to make the function indirectly
callable. This now requires the use of SYM_TYPED_FUNC_START.
As such, remove the implicit ENDBR from SYM_FUNC_START* and add some
explicit annotations to fix things up again.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/crypto/aesni-intel_asm.S | 2 ++
arch/x86/entry/calling.h | 1 +
arch/x86/entry/entry_64.S | 1 +
arch/x86/entry/entry_64_fred.S | 1 +
arch/x86/include/asm/linkage.h | 18 ++++++------------
arch/x86/include/asm/paravirt_types.h | 12 +++++++++++-
arch/x86/include/asm/special_insns.h | 4 ++--
arch/x86/kernel/acpi/madt_playdead.S | 1 +
arch/x86/kernel/acpi/wakeup_64.S | 1 +
arch/x86/kernel/ftrace_64.S | 5 +++++
arch/x86/kernel/irqflags.S | 1 +
arch/x86/kernel/paravirt.c | 14 ++++++++++++--
arch/x86/lib/retpoline.S | 1 +
arch/x86/mm/mem_encrypt_boot.S | 1 +
arch/x86/power/hibernate_asm_64.S | 2 ++
arch/x86/xen/xen-asm.S | 5 +++++
16 files changed, 53 insertions(+), 17 deletions(-)
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/frame.h>
#define STATE1 %xmm0
@@ -1071,6 +1072,7 @@ SYM_FUNC_END(_aesni_inc)
* size_t len, u8 *iv)
*/
SYM_FUNC_START(aesni_ctr_enc)
+ ANNOTATE_NOENDBR
FRAME_BEGIN
cmp $16, LEN
jb .Lctr_enc_just_ret
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -431,6 +431,7 @@ For 32-bit we have the following convent
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func
SYM_FUNC_START(\name)
+ ANNOTATE_NOENDBR
pushq %rbp
movq %rsp, %rbp
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -176,6 +176,7 @@ SYM_CODE_END(entry_SYSCALL_64)
*/
.pushsection .text, "ax"
SYM_FUNC_START(__switch_to_asm)
+ ANNOTATE_NOENDBR
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -58,6 +58,7 @@ SYM_CODE_END(asm_fred_entrypoint_kernel)
#if IS_ENABLED(CONFIG_KVM_INTEL)
SYM_FUNC_START(asm_fred_entry_from_kvm)
+ ANNOTATE_NOENDBR
push %rbp
mov %rsp, %rbp
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -119,33 +119,27 @@
/* SYM_FUNC_START -- use for global functions */
#define SYM_FUNC_START(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)
/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
#define SYM_FUNC_START_NOALIGN(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)
/* SYM_FUNC_START_LOCAL -- use for local functions */
#define SYM_FUNC_START_LOCAL(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)
/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)
/* SYM_FUNC_START_WEAK -- use for weak functions */
#define SYM_FUNC_START_WEAK(name) \
- SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)
/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
#define SYM_FUNC_START_WEAK_NOALIGN(name) \
- SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
#endif /* _ASM_X86_LINKAGE_H */
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -246,7 +246,17 @@ extern struct paravirt_patch_template pv
int paravirt_disable_iospace(void);
-/* This generates an indirect call based on the operation type number. */
+/*
+ * This generates an indirect call based on the operation type number.
+ *
+ * Since alternatives run after enabling CET/IBT -- the latter setting/clearing
+ * capabilities and the former requiring all capabilities being finalized --
+ * these indirect calls are subject to IBT and the paravirt stubs should have
+ * ENDBR on.
+ *
+ * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs
+ * don't need to bother with CFI prefixes.
+ */
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -42,14 +42,14 @@ static __always_inline void native_write
asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
{
asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
--- a/arch/x86/kernel/acpi/madt_playdead.S
+++ b/arch/x86/kernel/acpi/madt_playdead.S
@@ -14,6 +14,7 @@
* rsi: PGD of the identity mapping
*/
SYM_FUNC_START(asm_acpi_mp_play_dead)
+ ANNOTATE_NOENDBR
/* Turn off global entries. Following CR3 write will flush them. */
movq %cr4, %rdx
andq $~(X86_CR4_PGE), %rdx
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -17,6 +17,7 @@
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
SYM_FUNC_START(wakeup_long64)
+ ANNOTATE_NOENDBR
movq saved_magic(%rip), %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -146,12 +146,14 @@ SYM_FUNC_END(ftrace_stub_graph)
#ifdef CONFIG_DYNAMIC_FTRACE
SYM_FUNC_START(__fentry__)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
SYM_FUNC_START(ftrace_caller)
+ ANNOTATE_NOENDBR
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
@@ -197,6 +199,7 @@ SYM_FUNC_END(ftrace_caller);
STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
SYM_FUNC_START(ftrace_regs_caller)
+ ANNOTATE_NOENDBR
/* Save the current flags before any operations that can change them */
pushfq
@@ -310,6 +313,7 @@ SYM_FUNC_END(ftrace_regs_caller)
STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
SYM_FUNC_START(ftrace_stub_direct_tramp)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
RET
SYM_FUNC_END(ftrace_stub_direct_tramp)
@@ -317,6 +321,7 @@ SYM_FUNC_END(ftrace_stub_direct_tramp)
#else /* ! CONFIG_DYNAMIC_FTRACE */
SYM_FUNC_START(__fentry__)
+ ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
cmpq $ftrace_stub, ftrace_trace_function
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -9,6 +9,7 @@
*/
.pushsection .noinstr.text, "ax"
SYM_FUNC_START(native_save_fl)
+ ENDBR
pushf
pop %_ASM_AX
RET
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -106,6 +106,16 @@ static noinstr void pv_native_write_cr2(
native_write_cr2(val);
}
+static noinstr unsigned long pv_native_read_cr3(void)
+{
+ return __native_read_cr3();
+}
+
+static noinstr void pv_native_write_cr3(unsigned long cr3)
+{
+ native_write_cr3(cr3);
+}
+
static noinstr unsigned long pv_native_get_debugreg(int regno)
{
return native_get_debugreg(regno);
@@ -199,8 +209,8 @@ struct paravirt_patch_template pv_ops =
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
.mmu.write_cr2 = pv_native_write_cr2,
- .mmu.read_cr3 = __native_read_cr3,
- .mmu.write_cr3 = native_write_cr3,
+ .mmu.read_cr3 = pv_native_read_cr3,
+ .mmu.write_cr3 = pv_native_write_cr3,
.mmu.pgd_alloc = __paravirt_pgd_alloc,
.mmu.pgd_free = paravirt_nop,
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -326,6 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret)
#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)
SYM_FUNC_START(entry_untrain_ret)
+ ANNOTATE_NOENDBR
ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO
SYM_FUNC_END(entry_untrain_ret)
__EXPORT_THUNK(entry_untrain_ret)
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -72,6 +72,7 @@ SYM_FUNC_START(sme_encrypt_execute)
SYM_FUNC_END(sme_encrypt_execute)
SYM_FUNC_START(__enc_copy)
+ ANNOTATE_NOENDBR
/*
* Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -26,6 +26,7 @@
/* code below belongs to the image kernel */
.align PAGE_SIZE
SYM_FUNC_START(restore_registers)
+ ANNOTATE_NOENDBR
/* go back to the original page tables */
movq %r9, %cr3
@@ -119,6 +120,7 @@ SYM_FUNC_END(restore_image)
/* code below has been relocated to a safe page */
SYM_FUNC_START(core_restore_code)
+ ANNOTATE_NOENDBR
/* switch to temporary page tables */
movq %rax, %cr3
/* flush TLB */
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -28,6 +28,7 @@
* non-zero.
*/
SYM_FUNC_START(xen_irq_disable_direct)
+ ENDBR
movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
RET
SYM_FUNC_END(xen_irq_disable_direct)
@@ -67,6 +68,7 @@ SYM_FUNC_END(check_events)
* then enter the hypervisor to get them handled.
*/
SYM_FUNC_START(xen_irq_enable_direct)
+ ENDBR
FRAME_BEGIN
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
@@ -97,6 +99,7 @@ SYM_FUNC_END(xen_irq_enable_direct)
* x86 use opposite senses (mask vs enable).
*/
SYM_FUNC_START(xen_save_fl_direct)
+ ENDBR
testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
setz %ah
addb %ah, %ah
@@ -104,6 +107,7 @@ SYM_FUNC_START(xen_save_fl_direct)
SYM_FUNC_END(xen_save_fl_direct)
SYM_FUNC_START(xen_read_cr2)
+ ENDBR
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
@@ -112,6 +116,7 @@ SYM_FUNC_START(xen_read_cr2)
SYM_FUNC_END(xen_read_cr2);
SYM_FUNC_START(xen_read_cr2_direct)
+ ENDBR
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_arch_cr2), %_ASM_AX
FRAME_END
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 3/8] x86/boot: Mark start_secondary() with __noendbr
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
2024-11-05 11:39 ` [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI Peter Zijlstra
2024-11-05 11:39 ` [PATCH 2/8] x86/cfi: Clean up linkage Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 11:39 ` [PATCH 4/8] x86/alternative: Simplify callthunk patching Peter Zijlstra
` (4 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
The handoff between the boot stubs and start_secondary() are before IBT is
enabled and is definitely not subject to kCFI. As such, suppress all that for
this function.
Notably when the ENDBR poison would become fatal (ud1 instead of nop) this will
trigger a tripple fault because we haven't set up the IDT to handle #UD yet.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/kernel/smpboot.c | 3 ++-
include/linux/objtool.h | 13 ++++++++++---
2 files changed, 12 insertions(+), 4 deletions(-)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -228,7 +228,7 @@ static void ap_calibrate_delay(void)
/*
* Activate a secondary processor.
*/
-static void notrace start_secondary(void *unused)
+static void notrace __noendbr start_secondary(void *unused)
{
/*
* Don't put *anything* except direct CPU state initialization
@@ -313,6 +313,7 @@ static void notrace start_secondary(void
wmb();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
+NOENDBR_SYMBOL(start_secondary);
/*
* The bootstrap kernel entry code has set these up. Save them for
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -45,12 +45,18 @@
#define STACK_FRAME_NON_STANDARD_FP(func)
#endif
-#define ANNOTATE_NOENDBR \
- "986: \n\t" \
+#define __ANNOTATE_NOENDBR(label) \
".pushsection .discard.noendbr\n\t" \
- ".long 986b\n\t" \
+ ".long " #label "\n\t" \
".popsection\n\t"
+#define NOENDBR_SYMBOL(func) \
+ asm(__ANNOTATE_NOENDBR(func))
+
+#define ANNOTATE_NOENDBR \
+ "986: \n\t" \
+ __ANNOTATE_NOENDBR(986b)
+
#define ASM_REACHABLE \
"998:\n\t" \
".pushsection .discard.reachable\n\t" \
@@ -157,6 +163,7 @@
#define STACK_FRAME_NON_STANDARD_FP(func)
#define ANNOTATE_NOENDBR
#define ASM_REACHABLE
+#define NOENDBR_SYMBOL(func)
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 4/8] x86/alternative: Simplify callthunk patching
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
` (2 preceding siblings ...)
2024-11-05 11:39 ` [PATCH 3/8] x86/boot: Mark start_secondary() with __noendbr Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 11:39 ` [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug() Peter Zijlstra
` (3 subsequent siblings)
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
Now that paravirt call patching is implemented using alternatives, it
is possible to avoid having to patch the alternative sites by
including the altinstr_replacement calls in the call_sites list.
This means we're now stacking relative adjustments like so:
callthunks_patch_builtin_calls():
patches all function calls to target: func() -> func()-10
since the CALL accounting lives in the CALL_PADDING.
This explicitly includes .altinstr_replacement
alt_replace_call():
patches: x86_BUG() -> target()
this patching is done in a relative manner, and will preserve
the above adjustment, meaning that with calldepth patching it
will do: x86_BUG()-10 -> target()-10
apply_relocation():
does code relocation, and adjusts all RIP-relative instructions
to the new location, also in a relative manner.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/alternative.h | 1 -
arch/x86/kernel/alternative.c | 8 ++++----
arch/x86/kernel/callthunks.c | 13 -------------
arch/x86/kernel/module.c | 17 ++++++-----------
tools/objtool/arch/x86/decode.c | 1 +
tools/objtool/check.c | 12 ++----------
6 files changed, 13 insertions(+), 39 deletions(-)
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -109,7 +109,6 @@ struct module;
struct callthunk_sites {
s32 *call_start, *call_end;
- struct alt_instr *alt_start, *alt_end;
};
#ifdef CONFIG_CALL_THUNKS
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1701,14 +1701,14 @@ void __init alternative_instructions(voi
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-
/*
- * Now all calls are established. Apply the call thunks if
- * required.
+ * Adjust all CALL instructions to point to func()-10, including
+ * those in .altinstr_replacement.
*/
callthunks_patch_builtin_calls();
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
/*
* Seal all functions that do not have their address taken.
*/
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -239,21 +239,10 @@ patch_call_sites(s32 *start, s32 *end, c
}
static __init_or_module void
-patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
- const struct core_text *ct)
-{
- struct alt_instr *a;
-
- for (a = start; a < end; a++)
- patch_call((void *)&a->instr_offset + a->instr_offset, ct);
-}
-
-static __init_or_module void
callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
{
prdbg("Patching call sites %s\n", ct->name);
patch_call_sites(cs->call_start, cs->call_end, ct);
- patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
prdbg("Patching call sites done%s\n", ct->name);
}
@@ -262,8 +251,6 @@ void __init callthunks_patch_builtin_cal
struct callthunk_sites cs = {
.call_start = __call_sites,
.call_end = __call_sites_end,
- .alt_start = __alt_instructions,
- .alt_end = __alt_instructions_end
};
if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -275,12 +275,7 @@ int module_finalize(const Elf_Ehdr *hdr,
void *rseg = (void *)returns->sh_addr;
apply_returns(rseg, rseg + returns->sh_size);
}
- if (alt) {
- /* patch .altinstructions */
- void *aseg = (void *)alt->sh_addr;
- apply_alternatives(aseg, aseg + alt->sh_size);
- }
- if (calls || alt) {
+ if (calls) {
struct callthunk_sites cs = {};
if (calls) {
@@ -288,13 +283,13 @@ int module_finalize(const Elf_Ehdr *hdr,
cs.call_end = (void *)calls->sh_addr + calls->sh_size;
}
- if (alt) {
- cs.alt_start = (void *)alt->sh_addr;
- cs.alt_end = (void *)alt->sh_addr + alt->sh_size;
- }
-
callthunks_patch_module_calls(&cs, me);
}
+ if (alt) {
+ /* patch .altinstructions */
+ void *aseg = (void *)alt->sh_addr;
+ apply_alternatives(aseg, aseg + alt->sh_size);
+ }
if (ibt_endbr) {
void *iseg = (void *)ibt_endbr->sh_addr;
apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -845,5 +845,6 @@ bool arch_is_rethunk(struct symbol *sym)
bool arch_is_embedded_insn(struct symbol *sym)
{
return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_alias_safe_ret") ||
!strcmp(sym->name, "srso_safe_ret");
}
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1410,15 +1410,6 @@ static void annotate_call_site(struct ob
if (!sym)
sym = reloc->sym;
- /*
- * Alternative replacement code is just template code which is
- * sometimes copied to the original instruction. For now, don't
- * annotate it. (In the future we might consider annotating the
- * original instruction if/when it ever makes sense to do so.)
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- return;
-
if (sym->static_call_tramp) {
list_add_tail(&insn->call_node, &file->static_call_list);
return;
@@ -1476,7 +1467,8 @@ static void annotate_call_site(struct ob
return;
}
- if (insn->type == INSN_CALL && !insn->sec->init)
+ if (insn->type == INSN_CALL && !insn->sec->init &&
+ !insn->_call_dest->embedded_insn)
list_add_tail(&insn->call_node, &file->call_list);
if (!sibling && dead_end_function(file, sym))
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug()
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
` (3 preceding siblings ...)
2024-11-05 11:39 ` [PATCH 4/8] x86/alternative: Simplify callthunk patching Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 15:19 ` Andrew Cooper
2024-11-05 11:39 ` [PATCH 6/8] x86/ibt: Clean up is_endbr() Peter Zijlstra
` (2 subsequent siblings)
7 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
Notably, don't attempt to decode an immediate when MOD == 3.
Additionally have it return the instruction length, such that WARN
like bugs can more reliably skip to the correct instruction.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/include/asm/bug.h | 5 +-
arch/x86/include/asm/ibt.h | 4 +-
arch/x86/kernel/cet.c | 18 +++++++---
arch/x86/kernel/traps.c | 77 ++++++++++++++++++++++++++++++++-------------
4 files changed, 73 insertions(+), 31 deletions(-)
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -22,8 +22,9 @@
#define SECOND_BYTE_OPCODE_UD2 0x0b
#define BUG_NONE 0xffff
-#define BUG_UD1 0xfffe
-#define BUG_UD2 0xfffd
+#define BUG_UD2 0xfffe
+#define BUG_UD1 0xfffd
+#define BUG_UD1_UBSAN 0xfffc
#ifdef CONFIG_GENERIC_BUG
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -41,7 +41,7 @@
_ASM_PTR fname "\n\t" \
".popsection\n\t"
-static inline __attribute_const__ u32 gen_endbr(void)
+static __always_inline __attribute_const__ u32 gen_endbr(void)
{
u32 endbr;
@@ -56,7 +56,7 @@ static inline __attribute_const__ u32 ge
return endbr;
}
-static inline __attribute_const__ u32 gen_endbr_poison(void)
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -94,10 +94,17 @@ __always_inline int is_valid_bugaddr(uns
/*
* Check for UD1 or UD2, accounting for Address Size Override Prefixes.
- * If it's a UD1, get the ModRM byte to pass along to UBSan.
+ * If it's a UD1, further decode to determine its use:
+ *
+ * UBSan{0}: 67 0f b9 00 ud1 (%eax),%eax
+ * UBSan{10}: 67 0f b9 40 10 ud1 0x10(%eax),%eax
+ * static_call: 0f b9 cc ud1 %esp,%ecx
+ *
+ * Notably UBSAN uses EAX, static_call uses ECX.
*/
-__always_inline int decode_bug(unsigned long addr, u32 *imm)
+__always_inline int decode_bug(unsigned long addr, s32 *imm, int *len)
{
+ unsigned long start = addr;
u8 v;
if (addr < TASK_SIZE_MAX)
@@ -110,24 +117,37 @@ __always_inline int decode_bug(unsigned
return BUG_NONE;
v = *(u8 *)(addr++);
- if (v == SECOND_BYTE_OPCODE_UD2)
+ if (v == SECOND_BYTE_OPCODE_UD2) {
+ *len = addr - start;
return BUG_UD2;
+ }
- if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
+ if (v != SECOND_BYTE_OPCODE_UD1)
return BUG_NONE;
- /* Retrieve the immediate (type value) for the UBSAN UD1 */
- v = *(u8 *)(addr++);
- if (X86_MODRM_RM(v) == 4)
- addr++;
-
*imm = 0;
- if (X86_MODRM_MOD(v) == 1)
- *imm = *(u8 *)addr;
- else if (X86_MODRM_MOD(v) == 2)
- *imm = *(u32 *)addr;
- else
- WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
+ v = *(u8 *)(addr++); /* ModRM */
+
+ /* Decode immediate, if present */
+ if (X86_MODRM_MOD(v) != 3) {
+ if (X86_MODRM_RM(v) == 4)
+ addr++; /* Skip SIB byte */
+
+ if (X86_MODRM_MOD(v) == 1) {
+ *imm = *(s8 *)addr;
+ addr += 1;
+
+ } else if (X86_MODRM_MOD(v) == 2) {
+ *imm = *(s32 *)addr;
+ addr += 4;
+ }
+ }
+
+ /* record instruction length */
+ *len = addr - start;
+
+ if (X86_MODRM_REG(v) == 0) /* EAX */
+ return BUG_UD1_UBSAN;
return BUG_UD1;
}
@@ -258,10 +278,10 @@ static inline void handle_invalid_op(str
static noinstr bool handle_bug(struct pt_regs *regs)
{
bool handled = false;
- int ud_type;
- u32 imm;
+ int ud_type, ud_len;
+ s32 ud_imm;
- ud_type = decode_bug(regs->ip, &imm);
+ ud_type = decode_bug(regs->ip, &ud_imm, &ud_len);
if (ud_type == BUG_NONE)
return handled;
@@ -281,15 +301,28 @@ static noinstr bool handle_bug(struct pt
*/
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_enable();
- if (ud_type == BUG_UD2) {
+
+ switch (ud_type) {
+ case BUG_UD2:
if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
- regs->ip += LEN_UD2;
+ regs->ip += ud_len;
handled = true;
}
- } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
- pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip);
+ break;
+
+ case BUG_UD1_UBSAN:
+ if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
+ pr_crit("%s at %pS\n",
+ report_ubsan_failure(regs, ud_imm),
+ (void *)regs->ip);
+ }
+ break;
+
+ default:
+ break;
}
+
if (regs->flags & X86_EFLAGS_IF)
raw_local_irq_disable();
instrumentation_end();
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 6/8] x86/ibt: Clean up is_endbr()
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
` (4 preceding siblings ...)
2024-11-05 11:39 ` [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug() Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 11:39 ` [PATCH 7/8] x86/ibt: Clean up poison_endbr() Peter Zijlstra
2024-11-05 11:39 ` [PATCH 8/8] x86/early_printk: Harden early_serial Peter Zijlstra
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees, Alexei Starovoitov, Andrii Nakryiko
Pretty much every caller of is_endbr() actually wants to test something at an
address and ends up doing get_kernel_nofault(). Fold the lot into a more
convenient helper.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
---
arch/x86/events/core.c | 2 +-
arch/x86/include/asm/ibt.h | 5 +++--
arch/x86/kernel/alternative.c | 20 ++++++++++++++------
arch/x86/kernel/kprobes/core.c | 11 +----------
arch/x86/net/bpf_jit_comp.c | 4 ++--
kernel/trace/bpf_trace.c | 14 ++------------
6 files changed, 23 insertions(+), 33 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2845,7 +2845,7 @@ static bool is_uprobe_at_func_entry(stru
return true;
/* endbr64 (64-bit only) */
- if (user_64bit_mode(regs) && is_endbr(*(u32 *)auprobe->insn))
+ if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
return true;
return false;
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -65,7 +65,7 @@ static __always_inline __attribute_const
return 0x001f0f66; /* osp nopl (%rax) */
}
-static inline bool is_endbr(u32 val)
+static inline bool __is_endbr(u32 val)
{
if (val == gen_endbr_poison())
return true;
@@ -74,6 +74,7 @@ static inline bool is_endbr(u32 val)
return val == gen_endbr();
}
+extern __noendbr bool is_endbr(u32 *val);
extern __noendbr u64 ibt_save(bool disable);
extern __noendbr void ibt_restore(u64 save);
@@ -102,7 +103,7 @@ extern bool __do_kernel_cp_fault(struct
#define __noendbr
-static inline bool is_endbr(u32 val) { return false; }
+static inline bool is_endbr(u32 *val) { return false; }
static inline u64 ibt_save(bool disable) { return 0; }
static inline void ibt_restore(u64 save) { }
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -852,16 +852,24 @@ void __init_or_module noinline apply_ret
#ifdef CONFIG_X86_KERNEL_IBT
+__noendbr bool is_endbr(u32 *val)
+{
+ u32 endbr;
+
+ __get_kernel_nofault(&endbr, val, u32, Efault);
+ return __is_endbr(endbr);
+
+Efault:
+ return false;
+}
+
static void poison_cfi(void *addr);
static void __init_or_module poison_endbr(void *addr, bool warn)
{
- u32 endbr, poison = gen_endbr_poison();
-
- if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
- return;
+ u32 poison = gen_endbr_poison();
- if (!is_endbr(endbr)) {
+ if (!is_endbr(addr)) {
WARN_ON_ONCE(warn);
return;
}
@@ -988,7 +996,7 @@ static u32 cfi_seed __ro_after_init;
static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed;
- while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+ while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) {
bool lsb = hash & 1;
hash >>= 1;
if (lsb)
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -373,16 +373,7 @@ static bool can_probe(unsigned long padd
kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
bool *on_func_entry)
{
- u32 insn;
-
- /*
- * Since 'addr' is not guaranteed to be safe to access, use
- * copy_from_kernel_nofault() to read the instruction:
- */
- if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32)))
- return NULL;
-
- if (is_endbr(insn)) {
+ if (is_endbr((u32 *)addr)) {
*on_func_entry = !offset || offset == 4;
if (*on_func_entry)
offset = 4;
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -625,7 +625,7 @@ int bpf_arch_text_poke(void *ip, enum bp
* See emit_prologue(), for IBT builds the trampoline hook is preceded
* with an ENDBR instruction.
*/
- if (is_endbr(*(u32 *)ip))
+ if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
@@ -2971,7 +2971,7 @@ static int __arch_prepare_bpf_trampoline
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- if (is_endbr(*(u32 *)orig_call))
+ if (is_endbr(orig_call))
orig_call += ENDBR_INSN_SIZE;
orig_call += X86_PATCH_SIZE;
}
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1027,19 +1027,9 @@ static const struct bpf_func_proto bpf_g
#ifdef CONFIG_X86_KERNEL_IBT
static unsigned long get_entry_ip(unsigned long fentry_ip)
{
- u32 instr;
+ if (is_endbr((void *)(fentry_ip - ENDBR_INSN_SIZE)))
+ return fentry_ip - ENDBR_INSN_SIZE;
- /* We want to be extra safe in case entry ip is on the page edge,
- * but otherwise we need to avoid get_kernel_nofault()'s overhead.
- */
- if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
- if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
- return fentry_ip;
- } else {
- instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
- }
- if (is_endbr(instr))
- fentry_ip -= ENDBR_INSN_SIZE;
return fentry_ip;
}
#else
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 7/8] x86/ibt: Clean up poison_endbr()
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
` (5 preceding siblings ...)
2024-11-05 11:39 ` [PATCH 6/8] x86/ibt: Clean up is_endbr() Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
2024-11-05 11:39 ` [PATCH 8/8] x86/early_printk: Harden early_serial Peter Zijlstra
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
Basically, get rid of the .warn argument and explicitly don't call the
function when we know there isn't an endbr. This makes the calling
code clearer.
Note: perhaps don't add functions to .cfi_sites when the function
doesn't have endbr -- OTOH why would the compiler emit the prefix if
it has already determined there are no indirect callers and has
omitted the ENDBR instruction.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/kernel/alternative.c | 42 +++++++++++++++++++++++++++++++++++-------
1 file changed, 35 insertions(+), 7 deletions(-)
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -864,14 +864,12 @@ __noendbr bool is_endbr(u32 *val)
static void poison_cfi(void *addr);
-static void __init_or_module poison_endbr(void *addr, bool warn)
+static void __init_or_module poison_endbr(void *addr)
{
u32 poison = gen_endbr_poison();
- if (!is_endbr(addr)) {
- WARN_ON_ONCE(warn);
+ if (WARN_ON_ONCE(!is_endbr(addr)))
return;
- }
DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr);
@@ -896,7 +894,7 @@ void __init_or_module noinline apply_sea
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- poison_endbr(addr, true);
+ poison_endbr(addr);
if (IS_ENABLED(CONFIG_FINEIBT))
poison_cfi(addr - 16);
}
@@ -1203,6 +1201,14 @@ static int cfi_rewrite_preamble(s32 *sta
void *addr = (void *)s + *s;
u32 hash;
+ /*
+ * When the function doesn't start with ENDBR the compiler will
+ * have determined there are no indirect calls to it and we
+ * don't need no CFI either.
+ */
+ if (!is_endbr(addr + 16))
+ continue;
+
hash = decode_preamble_hash(addr);
if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr))
@@ -1223,7 +1229,10 @@ static void cfi_rewrite_endbr(s32 *start
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- poison_endbr(addr+16, false);
+ if (!is_endbr(addr + 16))
+ continue;
+
+ poison_endbr(addr + 16);
}
}
@@ -1356,9 +1365,23 @@ static inline void poison_hash(void *add
static void poison_cfi(void *addr)
{
+ /*
+ * Compilers manage to be inconsistent with ENDBR vs __cfi prefixes,
+ * some (static) functions for which they can determine the address
+ * is never taken do not get a __cfi prefix, but *DO* get an ENDBR.
+ *
+ * As such, these functions will get sealed, but we need to be careful
+ * to not unconditionally scribble the previous function.
+ */
switch (cfi_mode) {
case CFI_FINEIBT:
/*
+ * FineIBT prefix should start with an ENDBR.
+ */
+ if (!is_endbr(addr))
+ break;
+
+ /*
* __cfi_\func:
* osp nopl (%rax)
* subl $0, %r10d
@@ -1366,12 +1389,17 @@ static void poison_cfi(void *addr)
* ud2
* 1: nop
*/
- poison_endbr(addr, false);
+ poison_endbr(addr);
poison_hash(addr + fineibt_preamble_hash);
break;
case CFI_KCFI:
/*
+ * kCFI prefix should start with a valid hash.
+ */
+ if (!decode_preamble_hash(addr))
+ break;
+ /*
* __cfi_\func:
* movl $0, %eax
* .skip 11, 0x90
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH 8/8] x86/early_printk: Harden early_serial
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
` (6 preceding siblings ...)
2024-11-05 11:39 ` [PATCH 7/8] x86/ibt: Clean up poison_endbr() Peter Zijlstra
@ 2024-11-05 11:39 ` Peter Zijlstra
7 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 11:39 UTC (permalink / raw)
To: x86
Cc: linux-kernel, peterz, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
Scott found that mem32_serial_in() is an ideal speculation gadget, an
indirectly callable function that takes an adddress and offset and
immediately does a load.
Use static_call() to take away the need for indirect calls and
explicitly seal the functions to ensure they're not callable on IBT
enabled parts.
Reported-by: Scott Constable <scott.d.constable@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
arch/x86/kernel/early_printk.c | 51 ++++++++++++++++++++---------------------
1 file changed, 25 insertions(+), 26 deletions(-)
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -19,6 +19,7 @@
#include <linux/usb/ehci_def.h>
#include <linux/usb/xhci-dbgp.h>
#include <asm/pci_x86.h>
+#include <linux/static_call.h>
/* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000)
@@ -94,26 +95,28 @@ static unsigned long early_serial_base =
#define DLL 0 /* Divisor Latch Low */
#define DLH 1 /* Divisor latch High */
-static unsigned int io_serial_in(unsigned long addr, int offset)
+static __noendbr unsigned int io_serial_in(unsigned long addr, int offset)
{
return inb(addr + offset);
}
+NOENDBR_SYMBOL(io_serial_in);
-static void io_serial_out(unsigned long addr, int offset, int value)
+static __noendbr void io_serial_out(unsigned long addr, int offset, int value)
{
outb(value, addr + offset);
}
+NOENDBR_SYMBOL(io_serial_out);
-static unsigned int (*serial_in)(unsigned long addr, int offset) = io_serial_in;
-static void (*serial_out)(unsigned long addr, int offset, int value) = io_serial_out;
+DEFINE_STATIC_CALL(serial_in, io_serial_in);
+DEFINE_STATIC_CALL(serial_out, io_serial_out);
static int early_serial_putc(unsigned char ch)
{
unsigned timeout = 0xffff;
- while ((serial_in(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
+ while ((static_call(serial_in)(early_serial_base, LSR) & XMTRDY) == 0 && --timeout)
cpu_relax();
- serial_out(early_serial_base, TXR, ch);
+ static_call(serial_out)(early_serial_base, TXR, ch);
return timeout ? 0 : -1;
}
@@ -131,16 +134,16 @@ static __init void early_serial_hw_init(
{
unsigned char c;
- serial_out(early_serial_base, LCR, 0x3); /* 8n1 */
- serial_out(early_serial_base, IER, 0); /* no interrupt */
- serial_out(early_serial_base, FCR, 0); /* no fifo */
- serial_out(early_serial_base, MCR, 0x3); /* DTR + RTS */
-
- c = serial_in(early_serial_base, LCR);
- serial_out(early_serial_base, LCR, c | DLAB);
- serial_out(early_serial_base, DLL, divisor & 0xff);
- serial_out(early_serial_base, DLH, (divisor >> 8) & 0xff);
- serial_out(early_serial_base, LCR, c & ~DLAB);
+ static_call(serial_out)(early_serial_base, LCR, 0x3); /* 8n1 */
+ static_call(serial_out)(early_serial_base, IER, 0); /* no interrupt */
+ static_call(serial_out)(early_serial_base, FCR, 0); /* no fifo */
+ static_call(serial_out)(early_serial_base, MCR, 0x3); /* DTR + RTS */
+
+ c = static_call(serial_in)(early_serial_base, LCR);
+ static_call(serial_out)(early_serial_base, LCR, c | DLAB);
+ static_call(serial_out)(early_serial_base, DLL, divisor & 0xff);
+ static_call(serial_out)(early_serial_base, DLH, (divisor >> 8) & 0xff);
+ static_call(serial_out)(early_serial_base, LCR, c & ~DLAB);
}
#define DEFAULT_BAUD 9600
@@ -183,28 +186,26 @@ static __init void early_serial_init(cha
/* Convert from baud to divisor value */
divisor = 115200 / baud;
- /* These will always be IO based ports */
- serial_in = io_serial_in;
- serial_out = io_serial_out;
-
/* Set up the HW */
early_serial_hw_init(divisor);
}
#ifdef CONFIG_PCI
-static void mem32_serial_out(unsigned long addr, int offset, int value)
+static __noendbr void mem32_serial_out(unsigned long addr, int offset, int value)
{
u32 __iomem *vaddr = (u32 __iomem *)addr;
/* shift implied by pointer type */
writel(value, vaddr + offset);
}
+NOENDBR_SYMBOL(mem32_serial_out);
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
+static __noendbr unsigned int mem32_serial_in(unsigned long addr, int offset)
{
u32 __iomem *vaddr = (u32 __iomem *)addr;
/* shift implied by pointer type */
return readl(vaddr + offset);
}
+NOENDBR_SYMBOL(mem32_serial_in);
/*
* early_pci_serial_init()
@@ -278,15 +279,13 @@ static __init void early_pci_serial_init
*/
if ((bar0 & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
/* it is IO mapped */
- serial_in = io_serial_in;
- serial_out = io_serial_out;
early_serial_base = bar0 & PCI_BASE_ADDRESS_IO_MASK;
write_pci_config(bus, slot, func, PCI_COMMAND,
cmdreg|PCI_COMMAND_IO);
} else {
/* It is memory mapped - assume 32-bit alignment */
- serial_in = mem32_serial_in;
- serial_out = mem32_serial_out;
+ static_call_update(serial_in, mem32_serial_in);
+ static_call_update(serial_out, mem32_serial_out);
/* WARNING! assuming the address is always in the first 4G */
early_serial_base =
(unsigned long)early_ioremap(bar0 & PCI_BASE_ADDRESS_MEM_MASK, 0x10);
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 11:39 ` [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI Peter Zijlstra
@ 2024-11-05 14:16 ` Christoph Hellwig
2024-11-05 14:27 ` Peter Zijlstra
0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-11-05 14:16 UTC (permalink / raw)
To: Peter Zijlstra
Cc: x86, linux-kernel, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
On Tue, Nov 05, 2024 at 12:39:02PM +0100, Peter Zijlstra wrote:
> The expectation is that all EXPORT'ed symbols are free to have their
> address taken and called indirectly.
I don't think that is the case at all. The is a relatively small number
of exported symbols that are called indirectly. I'd much rather mark
those explicitly.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 14:16 ` Christoph Hellwig
@ 2024-11-05 14:27 ` Peter Zijlstra
2024-11-05 14:32 ` Christoph Hellwig
0 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 14:27 UTC (permalink / raw)
To: Christoph Hellwig
Cc: x86, linux-kernel, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
On Tue, Nov 05, 2024 at 06:16:01AM -0800, Christoph Hellwig wrote:
> On Tue, Nov 05, 2024 at 12:39:02PM +0100, Peter Zijlstra wrote:
> > The expectation is that all EXPORT'ed symbols are free to have their
> > address taken and called indirectly.
>
> I don't think that is the case at all. The is a relatively small number
> of exported symbols that are called indirectly. I'd much rather mark
> those explicitly.
I'm not claiming they have their address taken -- just saying that
traditionally this has always been a valid thing to do.
Anyway, I raised this point last time, and I think back then the
consensus was to explicitly mark those you should not be able to call.
But irrespective of all that, this just makes sure all the .S functions
are on equal footing with the C functions as generated by the compiler.
Once that's done, we can look at adding to the EXPORT family.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 14:27 ` Peter Zijlstra
@ 2024-11-05 14:32 ` Christoph Hellwig
2024-11-05 14:58 ` Peter Zijlstra
0 siblings, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-11-05 14:32 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Christoph Hellwig, x86, linux-kernel, alyssa.milburn,
scott.d.constable, joao, andrew.cooper3, jpoimboe,
alexei.starovoitov, ebiggers, samitolvanen, kees
On Tue, Nov 05, 2024 at 03:27:20PM +0100, Peter Zijlstra wrote:
> > I don't think that is the case at all. The is a relatively small number
> > of exported symbols that are called indirectly. I'd much rather mark
> > those explicitly.
>
> I'm not claiming they have their address taken -- just saying that
> traditionally this has always been a valid thing to do.
>
> Anyway, I raised this point last time, and I think back then the
> consensus was to explicitly mark those you should not be able to call.
Who came to that consensus? There really is just a relatively well
bounded number of functions that are used as either default methods
or as ready made callbacks. Everything else has no business being
called indirectly. While disallowing this might be a bit of work,
I think it would be a great security improvement.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 14:32 ` Christoph Hellwig
@ 2024-11-05 14:58 ` Peter Zijlstra
2024-11-05 15:41 ` Christoph Hellwig
2024-11-09 9:14 ` David Laight
0 siblings, 2 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 14:58 UTC (permalink / raw)
To: Christoph Hellwig
Cc: x86, linux-kernel, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
On Tue, Nov 05, 2024 at 06:32:12AM -0800, Christoph Hellwig wrote:
> On Tue, Nov 05, 2024 at 03:27:20PM +0100, Peter Zijlstra wrote:
> > > I don't think that is the case at all. The is a relatively small number
> > > of exported symbols that are called indirectly. I'd much rather mark
> > > those explicitly.
> >
> > I'm not claiming they have their address taken -- just saying that
> > traditionally this has always been a valid thing to do.
> >
> > Anyway, I raised this point last time, and I think back then the
> > consensus was to explicitly mark those you should not be able to call.
>
> Who came to that consensus? There really is just a relatively well
The people who found that thread.
> bounded number of functions that are used as either default methods
> or as ready made callbacks. Everything else has no business being
> called indirectly. While disallowing this might be a bit of work,
> I think it would be a great security improvement.
Well, we don't disagree. But since most of the EXPORT'ed functions are
done in C, we need something that works there too.
I think the idea was that we add EXPORT_SYMBOL{,_GPL}_SEALED() and go
convert everything over to that.
Anyway, 0-day just informed me that this patch has a wee build issue :-/
That robot always waits until after you post to tell you.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug()
2024-11-05 11:39 ` [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug() Peter Zijlstra
@ 2024-11-05 15:19 ` Andrew Cooper
0 siblings, 0 replies; 18+ messages in thread
From: Andrew Cooper @ 2024-11-05 15:19 UTC (permalink / raw)
To: Peter Zijlstra, x86
Cc: linux-kernel, alyssa.milburn, scott.d.constable, joao, jpoimboe,
alexei.starovoitov, ebiggers, samitolvanen, kees
On 05/11/2024 11:39 am, Peter Zijlstra wrote:
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -110,24 +117,37 @@ __always_inline int decode_bug(unsigned
> return BUG_NONE;
>
> v = *(u8 *)(addr++);
> - if (v == SECOND_BYTE_OPCODE_UD2)
> + if (v == SECOND_BYTE_OPCODE_UD2) {
> + *len = addr - start;
> return BUG_UD2;
> + }
>
> - if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
> + if (v != SECOND_BYTE_OPCODE_UD1)
> return BUG_NONE;
>
> - /* Retrieve the immediate (type value) for the UBSAN UD1 */
> - v = *(u8 *)(addr++);
> - if (X86_MODRM_RM(v) == 4)
> - addr++;
> -
> *imm = 0;
> - if (X86_MODRM_MOD(v) == 1)
> - *imm = *(u8 *)addr;
> - else if (X86_MODRM_MOD(v) == 2)
> - *imm = *(u32 *)addr;
> - else
> - WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
> + v = *(u8 *)(addr++); /* ModRM */
> +
> + /* Decode immediate, if present */
> + if (X86_MODRM_MOD(v) != 3) {
> + if (X86_MODRM_RM(v) == 4)
> + addr++; /* Skip SIB byte */
> +
> + if (X86_MODRM_MOD(v) == 1) {
> + *imm = *(s8 *)addr;
> + addr += 1;
> +
> + } else if (X86_MODRM_MOD(v) == 2) {
> + *imm = *(s32 *)addr;
> + addr += 4;
> + }
> + }
> +
> + /* record instruction length */
> + *len = addr - start;
`ud1 0(%rip),%eax` has something to say about this length calculation[1].
You need the Mod = 0, RM = 5 case wired into addr += 4 without filling
in imm.
~Andrew
[1] or maybe you've got something rude to say about those of us who
encode instructions like that...[2]
[2] It's perhaps fortunate that decode_bug() doesn't know what a REX
prefix is.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 14:58 ` Peter Zijlstra
@ 2024-11-05 15:41 ` Christoph Hellwig
2024-11-05 15:47 ` Peter Zijlstra
2024-11-09 9:14 ` David Laight
1 sibling, 1 reply; 18+ messages in thread
From: Christoph Hellwig @ 2024-11-05 15:41 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Christoph Hellwig, x86, linux-kernel, alyssa.milburn,
scott.d.constable, joao, andrew.cooper3, jpoimboe,
alexei.starovoitov, ebiggers, samitolvanen, kees
On Tue, Nov 05, 2024 at 03:58:42PM +0100, Peter Zijlstra wrote:
> > bounded number of functions that are used as either default methods
> > or as ready made callbacks. Everything else has no business being
> > called indirectly. While disallowing this might be a bit of work,
> > I think it would be a great security improvement.
>
> Well, we don't disagree. But since most of the EXPORT'ed functions are
> done in C, we need something that works there too.
Yes, absolutely. In fact I doubt there are more than a handful of
assembly exports that are valid targets for indirect calls.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 15:41 ` Christoph Hellwig
@ 2024-11-05 15:47 ` Peter Zijlstra
2024-11-05 16:11 ` Peter Zijlstra
0 siblings, 1 reply; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 15:47 UTC (permalink / raw)
To: Christoph Hellwig
Cc: x86, linux-kernel, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
On Tue, Nov 05, 2024 at 07:41:13AM -0800, Christoph Hellwig wrote:
> On Tue, Nov 05, 2024 at 03:58:42PM +0100, Peter Zijlstra wrote:
> > > bounded number of functions that are used as either default methods
> > > or as ready made callbacks. Everything else has no business being
> > > called indirectly. While disallowing this might be a bit of work,
> > > I think it would be a great security improvement.
> >
> > Well, we don't disagree. But since most of the EXPORT'ed functions are
> > done in C, we need something that works there too.
>
> Yes, absolutely. In fact I doubt there are more than a handful of
> assembly exports that are valid targets for indirect calls.
Yeah, I went overboard here. Let me tone it down a little.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 15:47 ` Peter Zijlstra
@ 2024-11-05 16:11 ` Peter Zijlstra
0 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2024-11-05 16:11 UTC (permalink / raw)
To: Christoph Hellwig
Cc: x86, linux-kernel, alyssa.milburn, scott.d.constable, joao,
andrew.cooper3, jpoimboe, alexei.starovoitov, ebiggers,
samitolvanen, kees
On Tue, Nov 05, 2024 at 04:47:40PM +0100, Peter Zijlstra wrote:
> On Tue, Nov 05, 2024 at 07:41:13AM -0800, Christoph Hellwig wrote:
> > On Tue, Nov 05, 2024 at 03:58:42PM +0100, Peter Zijlstra wrote:
> > > > bounded number of functions that are used as either default methods
> > > > or as ready made callbacks. Everything else has no business being
> > > > called indirectly. While disallowing this might be a bit of work,
> > > > I think it would be a great security improvement.
> > >
> > > Well, we don't disagree. But since most of the EXPORT'ed functions are
> > > done in C, we need something that works there too.
> >
> > Yes, absolutely. In fact I doubt there are more than a handful of
> > assembly exports that are valid targets for indirect calls.
>
> Yeah, I went overboard here. Let me tone it down a little.
OK, I removed a ton of them and fixed the build fallout. I've pushed it
out to the git tree mentioned somewhere, and will re-post in a few days.
^ permalink raw reply [flat|nested] 18+ messages in thread
* RE: [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI
2024-11-05 14:58 ` Peter Zijlstra
2024-11-05 15:41 ` Christoph Hellwig
@ 2024-11-09 9:14 ` David Laight
1 sibling, 0 replies; 18+ messages in thread
From: David Laight @ 2024-11-09 9:14 UTC (permalink / raw)
To: 'Peter Zijlstra', Christoph Hellwig
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
alyssa.milburn@intel.com, scott.d.constable@intel.com,
joao@overdrivepizza.com, andrew.cooper3@citrix.com,
jpoimboe@kernel.org, alexei.starovoitov@gmail.com,
ebiggers@kernel.org, samitolvanen@google.com, kees@kernel.org
From: Peter Zijlstra
> Sent: 05 November 2024 14:59
>
> On Tue, Nov 05, 2024 at 06:32:12AM -0800, Christoph Hellwig wrote:
> > On Tue, Nov 05, 2024 at 03:27:20PM +0100, Peter Zijlstra wrote:
> > > > I don't think that is the case at all. The is a relatively small number
> > > > of exported symbols that are called indirectly. I'd much rather mark
> > > > those explicitly.
> > >
> > > I'm not claiming they have their address taken -- just saying that
> > > traditionally this has always been a valid thing to do.
> > >
> > > Anyway, I raised this point last time, and I think back then the
> > > consensus was to explicitly mark those you should not be able to call.
> >
> > Who came to that consensus? There really is just a relatively well
>
> The people who found that thread.
>
> > bounded number of functions that are used as either default methods
> > or as ready made callbacks. Everything else has no business being
> > called indirectly. While disallowing this might be a bit of work,
> > I think it would be a great security improvement.
>
> Well, we don't disagree. But since most of the EXPORT'ed functions are
> done in C, we need something that works there too.
>
> I think the idea was that we add EXPORT_SYMBOL{,_GPL}_SEALED() and go
> convert everything over to that.
Don't you really need a compiler attribute that makes a function
indirectly callable?
(Instead of the compiler trying to sort it out itself?)
With the default being 'not indirectly callable' unless the function
address is taken in the same compilation unit as its definition.
If added to the declaration (in the .h file) the compiler would be
able to error code that takes the address of such functions.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2024-11-09 9:14 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-05 11:39 [PATCH 0/8] x86: kCFI and IBT cleanups Peter Zijlstra
2024-11-05 11:39 ` [PATCH 1/8] x86,kcfi: Fix EXPORT_SYMBOL vs kCFI Peter Zijlstra
2024-11-05 14:16 ` Christoph Hellwig
2024-11-05 14:27 ` Peter Zijlstra
2024-11-05 14:32 ` Christoph Hellwig
2024-11-05 14:58 ` Peter Zijlstra
2024-11-05 15:41 ` Christoph Hellwig
2024-11-05 15:47 ` Peter Zijlstra
2024-11-05 16:11 ` Peter Zijlstra
2024-11-09 9:14 ` David Laight
2024-11-05 11:39 ` [PATCH 2/8] x86/cfi: Clean up linkage Peter Zijlstra
2024-11-05 11:39 ` [PATCH 3/8] x86/boot: Mark start_secondary() with __noendbr Peter Zijlstra
2024-11-05 11:39 ` [PATCH 4/8] x86/alternative: Simplify callthunk patching Peter Zijlstra
2024-11-05 11:39 ` [PATCH 5/8] x86/traps: Cleanup and robustify decode_bug() Peter Zijlstra
2024-11-05 15:19 ` Andrew Cooper
2024-11-05 11:39 ` [PATCH 6/8] x86/ibt: Clean up is_endbr() Peter Zijlstra
2024-11-05 11:39 ` [PATCH 7/8] x86/ibt: Clean up poison_endbr() Peter Zijlstra
2024-11-05 11:39 ` [PATCH 8/8] x86/early_printk: Harden early_serial Peter Zijlstra
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.