From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>,
tglx@kernel.org, mingo@redhat.com, bp@alien8.de,
Nathan Chancellor <nathan@kernel.org>,
Calvin Owens <calvin@wbinvd.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86-ML <x86@kernel.org>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: 8aeb879baf12 - significant system call latency regression, bisected
Date: Fri, 19 Jun 2026 10:14:27 +0200 [thread overview]
Message-ID: <20260619081427.GD49529@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20260617123718.GM49951@noisy.programming.kicks-ass.net>
On Wed, Jun 17, 2026 at 02:37:18PM +0200, Peter Zijlstra wrote:
> - makes -fno-jump-tables unconditional
> - removes array_index_nospec() from the syscall dispatch
FWIW, this also allows making all SYSCALLs __noendbr, very much
including the 'legacy' sys_call_table :-)
Compile tested with IA32_EMULATION=n and reliably yields:
vmlinux.o: warning: objtool: sys_call_table+0x0: data relocation to !ENDBR: __x64_sys_read+0x0
...
vmlinux.o: warning: objtool: sys_call_table+0xeb0: data relocation to !ENDBR: __x64_sys_listns+0x0
(which is just one little objtool patch away from being fixed)
and boots fine (in kvm).
---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 598f178102ee..b154a2a20eb2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -90,17 +90,8 @@ CC_FLAGS_FPU += -mhard-float
endif
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
-#
-# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
-# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK
-# for jump-tables, as such, disable jump-tables for now.
-#
-# (jump-tables are implicitly disabled by RETPOLINE)
-#
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816
-#
-KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables)
-KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch)
+KBUILD_RUSTFLAGS += -Zcf-protection=branch
else
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
@@ -173,6 +164,13 @@ endif
KBUILD_RUSTFLAGS += -Ccode-model=kernel
percpu_seg := gs
+
+ # Due to retpolines and cf-protection=branch's implicit NOTRACK usage
+ # for jump-tables, blanked disable jump-tables for all x86_64 builds to
+ # get a consistent behaviour across configurations. This allows
+ # removing some array_index_nospec() usage.
+ KBUILD_CFLAGS += -fno-jump-tables
+ KBUILD_RISTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
endif
ifeq ($(CONFIG_STACKPROTECTOR),y)
@@ -209,15 +207,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
ifdef CONFIG_MITIGATION_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
KBUILD_RUSTFLAGS += $(RETPOLINE_RUSTFLAGS)
- # Additionally, avoid generating expensive indirect jumps which
- # are subject to retpolines for small number of switch cases.
- # LLVM turns off jump table generation by default when under
- # retpoline builds, however, gcc does not for x86. This has
- # only been fixed starting from gcc stable version 8.4.0 and
- # onwards, but not for older ones. See gcc bug #86952.
- ifndef CONFIG_CC_IS_CLANG
- KBUILD_CFLAGS += -fno-jump-tables
- endif
endif
ifdef CONFIG_MITIGATION_SLS
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 71f032504e73..7e87947e12be 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,9 +8,10 @@
#include <linux/entry-common.h>
#include <linux/nospec.h>
#include <asm/syscall.h>
+#include <asm/ibt.h>
-#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
+#define __SYSCALL(nr, sym) extern __noendbr long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern __noendbr long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#ifdef CONFIG_X86_X32_ABI
#include <asm/syscalls_x32.h>
@@ -25,30 +26,47 @@
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) (void *)&__x64_##sym,
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ /*
+ * Because -fno-jump-tables, this compiles into a binary branch tree
+ * rather than a jump-table. As such @nr is not used as an array
+ * index. Additionally, this is an out-of-line function on purpose,
+ * such that all the actual syscall function calls are tail-calls,
+ * returning to our caller for the common bits.
+ */
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
}
#ifdef CONFIG_X86_X32_ABI
-long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_x32.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
+}
+#else
+static __always_inline long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ return __x64_sys_ni_syscall(regs);
}
#endif
+#undef __SYSCALL
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
{
@@ -59,7 +77,6 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
unsigned int unr = nr;
if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
regs->ax = x64_sys_call(regs, unr);
return true;
}
@@ -76,7 +93,6 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
unsigned int xnr = nr - __X32_SYSCALL_BIT;
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call(regs, xnr);
return true;
}
@@ -84,7 +100,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
}
/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
nr = syscall_enter_from_user_mode(regs, nr);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 7e88705e907f..1639fbc02680 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -7,9 +7,10 @@
#define _ASM_X86_SYSCALL_WRAPPER_H
#include <asm/ptrace.h>
+#include <asm/ibt.h>
-extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
-extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __ia32_sys_ni_syscall(const struct pt_regs *regs);
/*
* Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
@@ -83,15 +84,15 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
__alias(__do_##name);
#define __SYS_STUBx(abi, name, ...) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
{ \
return __se_##name(__VA_ARGS__); \
}
@@ -257,8 +258,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-long __x64_sys_getcpu(const struct pt_regs *regs);
-long __x64_sys_gettimeofday(const struct pt_regs *regs);
-long __x64_sys_time(const struct pt_regs *regs);
+long __noendbr __x64_sys_getcpu(const struct pt_regs *regs);
+long __noendbr __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __noendbr __x64_sys_time(const struct pt_regs *regs);
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
next prev parent reply other threads:[~2026-06-19 8:14 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-13 1:45 8aeb879baf12 - significant system call latency regression, bisected "H. Peter Anvin" (Intel)
2026-06-13 8:59 ` Peter Zijlstra
2026-06-13 20:34 ` H. Peter Anvin
2026-06-13 23:52 ` H. Peter Anvin
2026-06-14 1:50 ` H. Peter Anvin
2026-06-14 18:08 ` Xin Li
2026-06-14 18:31 ` H. Peter Anvin
2026-06-15 0:19 ` H. Peter Anvin
2026-06-15 2:07 ` H. Peter Anvin
2026-06-15 3:41 ` Linus Torvalds
2026-06-15 18:30 ` H. Peter Anvin
2026-06-16 7:12 ` Peter Zijlstra
2026-06-16 7:38 ` Peter Zijlstra
2026-06-16 7:53 ` Peter Zijlstra
2026-06-18 23:05 ` H. Peter Anvin
2026-06-19 7:50 ` Peter Zijlstra
2026-06-19 10:22 ` H. Peter Anvin
2026-06-16 8:28 ` Peter Zijlstra
2026-06-16 8:46 ` Linus Torvalds
2026-06-16 9:51 ` Ingo Molnar
2026-06-16 17:44 ` H. Peter Anvin
2026-06-17 9:54 ` Ingo Molnar
2026-06-17 10:05 ` Ingo Molnar
2026-06-17 12:37 ` Peter Zijlstra
2026-06-18 22:40 ` H. Peter Anvin
2026-06-19 1:11 ` H. Peter Anvin
2026-06-19 2:08 ` Linus Torvalds
2026-06-19 2:11 ` Linus Torvalds
2026-06-19 4:32 ` H. Peter Anvin
2026-06-19 7:35 ` Peter Zijlstra
2026-06-19 2:11 ` H. Peter Anvin
2026-06-19 7:31 ` Peter Zijlstra
2026-06-19 8:14 ` Peter Zijlstra [this message]
2026-06-19 10:23 ` H. Peter Anvin
2026-06-19 11:18 ` Peter Zijlstra
2026-06-19 21:53 ` syscall path improvements (was: syscall performance regression, debunked) H. Peter Anvin
2026-06-16 13:53 ` 8aeb879baf12 - significant system call latency regression, bisected David Laight
2026-06-18 23:03 ` H. Peter Anvin
2026-06-14 2:11 ` Calvin Owens
2026-06-14 2:14 ` Calvin Owens
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260619081427.GD49529@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bp@alien8.de \
--cc=calvin@wbinvd.org \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=nathan@kernel.org \
--cc=tglx@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox