From: Brian Gerst <brgerst@gmail.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org
Cc: Ingo Molnar <mingo@kernel.org>, "H . Peter Anvin" <hpa@zytor.com>,
Thomas Gleixner <tglx@linutronix.de>,
Borislav Petkov <bp@alien8.de>, Andy Lutomirski <luto@kernel.org>,
Juergen Gross <jgross@suse.com>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
Brian Gerst <brgerst@gmail.com>
Subject: [PATCH 3/5] x86/syscall/64: Move 64-bit syscall dispatch code
Date: Thu, 13 Mar 2025 14:22:34 -0400 [thread overview]
Message-ID: <20250313182236.655724-4-brgerst@gmail.com> (raw)
In-Reply-To: <20250313182236.655724-1-brgerst@gmail.com>
Move the 64-bit syscall dispatch code to syscall_64.c.
No functional changes.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
arch/x86/entry/Makefile | 2 +
arch/x86/entry/common.c | 93 --------------------------------
arch/x86/entry/syscall_64.c | 103 +++++++++++++++++++++++++++++++++++-
3 files changed, 103 insertions(+), 95 deletions(-)
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 96a6b86e0a8b..5fd28abfd5a0 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -9,9 +9,11 @@ KCOV_INSTRUMENT := n
CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_common.o += -fno-stack-protector
CFLAGS_syscall_32.o += -fno-stack-protector
+CFLAGS_syscall_64.o += -fno-stack-protector
obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
obj-y += common.o
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 183efabefe57..5bd448c0664f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -32,99 +32,6 @@
#include <asm/syscall.h>
#include <asm/irq_stack.h>
-#ifdef CONFIG_X86_64
-
-static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
-{
- /*
- * Convert negative numbers to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int unr = nr;
-
- if (likely(unr < NR_syscalls)) {
- unr = array_index_nospec(unr, NR_syscalls);
- regs->ax = x64_sys_call(regs, unr);
- return true;
- }
- return false;
-}
-
-static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
-{
- /*
- * Adjust the starting offset of the table, and convert numbers
- * < __X32_SYSCALL_BIT to very high and thus out of range
- * numbers for comparisons.
- */
- unsigned int xnr = nr - __X32_SYSCALL_BIT;
-
- if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
- xnr = array_index_nospec(xnr, X32_NR_syscalls);
- regs->ax = x32_sys_call(regs, xnr);
- return true;
- }
- return false;
-}
-
-/* Returns true to return using SYSRET, or false to use IRET */
-__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
-{
- add_random_kstack_offset();
- nr = syscall_enter_from_user_mode(regs, nr);
-
- instrumentation_begin();
-
- if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
- /* Invalid system call, but still a system call. */
- regs->ax = __x64_sys_ni_syscall(regs);
- }
-
- instrumentation_end();
- syscall_exit_to_user_mode(regs);
-
- /*
- * Check that the register state is valid for using SYSRET to exit
- * to userspace. Otherwise use the slower but fully capable IRET
- * exit path.
- */
-
- /* XEN PV guests always use the IRET path */
- if (cpu_feature_enabled(X86_FEATURE_XENPV))
- return false;
-
- /* SYSRET requires RCX == RIP and R11 == EFLAGS */
- if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
- return false;
-
- /* CS and SS must match the values set in MSR_STAR */
- if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
- return false;
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * TASK_SIZE_MAX covers all user-accessible addresses other than
- * the deprecated vsyscall page.
- */
- if (unlikely(regs->ip >= TASK_SIZE_MAX))
- return false;
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
- return false;
-
- /* Use SYSRET to exit to userspace */
- return true;
-}
-#endif
-
SYSCALL_DEFINE0(ni_syscall)
{
return -ENOSYS;
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index ba8354424860..9e0ba339013c 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -1,10 +1,19 @@
-// SPDX-License-Identifier: GPL-2.0
-/* System call table for x86-64. */
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * 64-bit system call dispatch
+ *
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * Based on asm and ptrace code by many authors. The code here originated
+ * in ptrace.c and signal.c.
+ */
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
#include <linux/syscalls.h>
+#include <linux/entry-common.h>
+#include <linux/nospec.h>
#include <asm/syscall.h>
#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
@@ -34,3 +43,93 @@ long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
default: return __x64_sys_ni_syscall(regs);
}
};
+
+static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+{
+ /*
+ * Convert negative numbers to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int unr = nr;
+
+ if (likely(unr < NR_syscalls)) {
+ unr = array_index_nospec(unr, NR_syscalls);
+ regs->ax = x64_sys_call(regs, unr);
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+{
+ /*
+ * Adjust the starting offset of the table, and convert numbers
+ * < __X32_SYSCALL_BIT to very high and thus out of range
+ * numbers for comparisons.
+ */
+ unsigned int xnr = nr - __X32_SYSCALL_BIT;
+
+ if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+ xnr = array_index_nospec(xnr, X32_NR_syscalls);
+ regs->ax = x32_sys_call(regs, xnr);
+ return true;
+ }
+ return false;
+}
+
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
+{
+ add_random_kstack_offset();
+ nr = syscall_enter_from_user_mode(regs, nr);
+
+ instrumentation_begin();
+
+ if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
+ /* Invalid system call, but still a system call. */
+ regs->ax = __x64_sys_ni_syscall(regs);
+ }
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use the IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * TASK_SIZE_MAX covers all user-accessible addresses other than
+ * the deprecated vsyscall page.
+ */
+ if (unlikely(regs->ip >= TASK_SIZE_MAX))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
+}
--
2.48.1
next prev parent reply other threads:[~2025-03-13 18:22 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-13 18:22 [PATCH 0/5] x86/entry: Break up common.c Brian Gerst
2025-03-13 18:22 ` [PATCH 1/5] x86/xen: Move Xen upcall handler Brian Gerst
2025-03-14 7:37 ` Juergen Gross
2025-03-14 9:47 ` [tip: x86/cpu] x86/xen: Move Xen upcall handler to Xen specific code files tip-bot2 for Brian Gerst
2025-03-14 9:53 ` Jürgen Groß
2025-03-14 10:08 ` Ingo Molnar
2025-03-14 10:14 ` Jürgen Groß
2025-03-13 18:22 ` [PATCH 2/5] x86/syscall/32: Move 32-bit syscall dispatch code Brian Gerst
2025-03-13 23:44 ` Sohil Mehta
2025-03-14 1:25 ` Brian Gerst
2025-03-14 9:27 ` Ingo Molnar
2025-03-14 9:46 ` [tip: x86/cpu] x86/syscall/32: Move the 32-bit syscall dispatch code to arch/x86/entry/syscall_32.c tip-bot2 for Brian Gerst
2025-03-13 18:22 ` Brian Gerst [this message]
2025-03-14 9:46 ` [tip: x86/cpu] x86/syscall/64: Move the 64-bit syscall dispatch code to arch/x86/entry/syscall_64.c tip-bot2 for Brian Gerst
2025-03-13 18:22 ` [PATCH 4/5] x86/syscall/x32: Move x32 syscall table Brian Gerst
2025-03-13 23:47 ` Sohil Mehta
2025-03-14 9:25 ` Ingo Molnar
2025-03-14 9:34 ` Ingo Molnar
2025-03-14 16:02 ` Sohil Mehta
2025-03-14 9:46 ` [tip: x86/cpu] x86/syscall/x32: Move the x32 syscall table to arch/x86/entry/syscall_64.c tip-bot2 for Brian Gerst
2025-03-13 18:22 ` [PATCH 5/5] x86/syscall: Move sys_ni_syscall() Brian Gerst
2025-03-14 9:46 ` [tip: x86/cpu] x86/syscall: Move sys_ni_syscall() to arch/x86/kernel/process.c tip-bot2 for Brian Gerst
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250313182236.655724-4-brgerst@gmail.com \
--to=brgerst@gmail.com \
--cc=boris.ostrovsky@oracle.com \
--cc=bp@alien8.de \
--cc=hpa@zytor.com \
--cc=jgross@suse.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@kernel.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.