From: Peter Zijlstra <peterz@infradead.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>,
tglx@kernel.org, mingo@redhat.com, bp@alien8.de,
Nathan Chancellor <nathan@kernel.org>,
Calvin Owens <calvin@wbinvd.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86-ML <x86@kernel.org>, LKML <linux-kernel@vger.kernel.org>
Subject: Re: 8aeb879baf12 - significant system call latency regression, bisected
Date: Fri, 19 Jun 2026 10:14:27 +0200 [thread overview]
Message-ID: <20260619081427.GD49529@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20260617123718.GM49951@noisy.programming.kicks-ass.net>
On Wed, Jun 17, 2026 at 02:37:18PM +0200, Peter Zijlstra wrote:
> - makes -fno-jump-tables unconditional
> - removes array_index_nospec() from the syscall dispatch
FWIW, this also allows making all SYSCALLs __noendbr, very much
including the 'legacy' sys_call_table :-)
Compile tested with IA32_EMULATION=n and reliably yields:
vmlinux.o: warning: objtool: sys_call_table+0x0: data relocation to !ENDBR: __x64_sys_read+0x0
...
vmlinux.o: warning: objtool: sys_call_table+0xeb0: data relocation to !ENDBR: __x64_sys_listns+0x0
(which is just one little objtool patch away from being fixed)
and boots fine (in kvm).
---
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 598f178102ee..b154a2a20eb2 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -90,17 +90,8 @@ CC_FLAGS_FPU += -mhard-float
endif
ifeq ($(CONFIG_X86_KERNEL_IBT),y)
-#
-# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate
-# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK
-# for jump-tables, as such, disable jump-tables for now.
-#
-# (jump-tables are implicitly disabled by RETPOLINE)
-#
-# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816
-#
-KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables)
-KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch)
+KBUILD_RUSTFLAGS += -Zcf-protection=branch
else
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
@@ -173,6 +164,13 @@ endif
KBUILD_RUSTFLAGS += -Ccode-model=kernel
percpu_seg := gs
+
+ # Due to retpolines and cf-protection=branch's implicit NOTRACK usage
+ # for jump-tables, blanked disable jump-tables for all x86_64 builds to
+ # get a consistent behaviour across configurations. This allows
+ # removing some array_index_nospec() usage.
+ KBUILD_CFLAGS += -fno-jump-tables
+ KBUILD_RISTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables)
endif
ifeq ($(CONFIG_STACKPROTECTOR),y)
@@ -209,15 +207,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
ifdef CONFIG_MITIGATION_RETPOLINE
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
KBUILD_RUSTFLAGS += $(RETPOLINE_RUSTFLAGS)
- # Additionally, avoid generating expensive indirect jumps which
- # are subject to retpolines for small number of switch cases.
- # LLVM turns off jump table generation by default when under
- # retpoline builds, however, gcc does not for x86. This has
- # only been fixed starting from gcc stable version 8.4.0 and
- # onwards, but not for older ones. See gcc bug #86952.
- ifndef CONFIG_CC_IS_CLANG
- KBUILD_CFLAGS += -fno-jump-tables
- endif
endif
ifdef CONFIG_MITIGATION_SLS
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 71f032504e73..7e87947e12be 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,9 +8,10 @@
#include <linux/entry-common.h>
#include <linux/nospec.h>
#include <asm/syscall.h>
+#include <asm/ibt.h>
-#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_NORETURN(nr, sym) extern long __noreturn __x64_##sym(const struct pt_regs *);
+#define __SYSCALL(nr, sym) extern __noendbr long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL_NORETURN(nr, sym) extern __noendbr long __noreturn __x64_##sym(const struct pt_regs *);
#include <asm/syscalls_64.h>
#ifdef CONFIG_X86_X32_ABI
#include <asm/syscalls_x32.h>
@@ -25,30 +26,47 @@
* kernel/trace/trace_syscalls.c still wants to know the system
* call address.
*/
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) (void *)&__x64_##sym,
const sys_call_ptr_t sys_call_table[] = {
#include <asm/syscalls_64.h>
};
#undef __SYSCALL
#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
-long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ /*
+ * Because -fno-jump-tables, this compiles into a binary branch tree
+ * rather than a jump-table. As such @nr is not used as an array
+ * index. Additionally, this is an out-of-line function on purpose,
+ * such that all the actual syscall function calls are tail-calls,
+ * returning to our caller for the common bits.
+ */
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_64.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
}
#ifdef CONFIG_X86_X32_ABI
-long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+static noinstr long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
{
+ instrumentation_begin();
switch (nr) {
#include <asm/syscalls_x32.h>
default: return __x64_sys_ni_syscall(regs);
}
+ instrumentation_end();
+}
+#else
+static __always_inline long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+ return __x64_sys_ni_syscall(regs);
}
#endif
+#undef __SYSCALL
static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
{
@@ -59,7 +77,6 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
unsigned int unr = nr;
if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
regs->ax = x64_sys_call(regs, unr);
return true;
}
@@ -76,7 +93,6 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
unsigned int xnr = nr - __X32_SYSCALL_BIT;
if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
regs->ax = x32_sys_call(regs, xnr);
return true;
}
@@ -84,7 +100,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
}
/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
nr = syscall_enter_from_user_mode(regs, nr);
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 7e88705e907f..1639fbc02680 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -7,9 +7,10 @@
#define _ASM_X86_SYSCALL_WRAPPER_H
#include <asm/ptrace.h>
+#include <asm/ibt.h>
-extern long __x64_sys_ni_syscall(const struct pt_regs *regs);
-extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __x64_sys_ni_syscall(const struct pt_regs *regs);
+extern __noendbr long __ia32_sys_ni_syscall(const struct pt_regs *regs);
/*
* Instead of the generic __SYSCALL_DEFINEx() definition, the x86 version takes
@@ -83,15 +84,15 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
__MAP(x, __SC_TYPE, __VA_ARGS__)) \
#define __SYS_STUB0(abi, name) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
__alias(__do_##name);
#define __SYS_STUBx(abi, name, ...) \
- long __##abi##_##name(const struct pt_regs *regs); \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__##abi##_##name, ERRNO); \
- long __##abi##_##name(const struct pt_regs *regs) \
+ long __noendbr __##abi##_##name(const struct pt_regs *regs) \
{ \
return __se_##name(__VA_ARGS__); \
}
@@ -257,8 +258,8 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-long __x64_sys_getcpu(const struct pt_regs *regs);
-long __x64_sys_gettimeofday(const struct pt_regs *regs);
-long __x64_sys_time(const struct pt_regs *regs);
+long __noendbr __x64_sys_getcpu(const struct pt_regs *regs);
+long __noendbr __x64_sys_gettimeofday(const struct pt_regs *regs);
+long __noendbr __x64_sys_time(const struct pt_regs *regs);
#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
next prev parent reply other threads:[~2026-06-19 8:14 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-13 1:45 8aeb879baf12 - significant system call latency regression, bisected "H. Peter Anvin" (Intel)
2026-06-13 8:59 ` Peter Zijlstra
2026-06-13 20:34 ` H. Peter Anvin
2026-06-13 23:52 ` H. Peter Anvin
2026-06-14 1:50 ` H. Peter Anvin
2026-06-14 18:08 ` Xin Li
2026-06-14 18:31 ` H. Peter Anvin
2026-06-15 0:19 ` H. Peter Anvin
2026-06-15 2:07 ` H. Peter Anvin
2026-06-15 3:41 ` Linus Torvalds
2026-06-15 18:30 ` H. Peter Anvin
2026-06-16 7:12 ` Peter Zijlstra
2026-06-16 7:38 ` Peter Zijlstra
2026-06-16 7:53 ` Peter Zijlstra
2026-06-18 23:05 ` H. Peter Anvin
2026-06-19 7:50 ` Peter Zijlstra
2026-06-19 10:22 ` H. Peter Anvin
2026-06-16 8:28 ` Peter Zijlstra
2026-06-16 8:46 ` Linus Torvalds
2026-06-16 9:51 ` Ingo Molnar
2026-06-16 17:44 ` H. Peter Anvin
2026-06-17 9:54 ` Ingo Molnar
2026-06-17 10:05 ` Ingo Molnar
2026-06-17 12:37 ` Peter Zijlstra
2026-06-18 22:40 ` H. Peter Anvin
2026-06-19 1:11 ` H. Peter Anvin
2026-06-19 2:08 ` Linus Torvalds
2026-06-19 2:11 ` Linus Torvalds
2026-06-19 4:32 ` H. Peter Anvin
2026-06-19 7:35 ` Peter Zijlstra
2026-06-19 2:11 ` H. Peter Anvin
2026-06-19 7:31 ` Peter Zijlstra
2026-06-19 8:14 ` Peter Zijlstra [this message]
2026-06-19 10:23 ` H. Peter Anvin
2026-06-19 11:18 ` Peter Zijlstra
2026-06-19 21:53 ` syscall path improvements (was: syscall performance regression, debunked) H. Peter Anvin
2026-06-16 13:53 ` 8aeb879baf12 - significant system call latency regression, bisected David Laight
2026-06-18 23:03 ` H. Peter Anvin
2026-06-14 2:11 ` Calvin Owens
2026-06-14 2:14 ` Calvin Owens
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260619081427.GD49529@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bp@alien8.de \
--cc=calvin@wbinvd.org \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=nathan@kernel.org \
--cc=tglx@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.