From: Brian Gerst <brgerst@gmail.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>,
Borislav Petkov <bp@alien8.de>, "H . Peter Anvin" <hpa@zytor.com>,
Andy Lutomirski <luto@kernel.org>,
Brian Gerst <brgerst@gmail.com>
Subject: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C
Date: Tue, 18 Jul 2023 09:44:42 -0400 [thread overview]
Message-ID: <20230718134446.168654-3-brgerst@gmail.com> (raw)
In-Reply-To: <20230718134446.168654-1-brgerst@gmail.com>
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
arch/x86/entry/entry_64.S | 55 ++--------------------------------
arch/x86/include/asm/syscall.h | 2 +-
3 files changed, 52 insertions(+), 55 deletions(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 6c2826417b33..afe79c3f1c5b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
return false;
}
-__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
+/* Returns true to return using SYSRET, or false to use IRET */
+__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
{
+ long rip;
+ unsigned int shift_rip;
+
add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
@@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
instrumentation_end();
syscall_exit_to_user_mode(regs);
+
+ /*
+ * Check that the register state is valid for using SYSRET to exit
+ * to userspace. Otherwise use the slower but fully capable IRET
+ * exit path.
+ */
+
+ /* XEN PV guests always use IRET path */
+ if (cpu_feature_enabled(X86_FEATURE_XENPV))
+ return false;
+
+ /* SYSRET requires RCX == RIP and R11 == EFLAGS */
+ if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+ return false;
+
+ /* CS and SS must match the values set in MSR_STAR */
+ if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+ return false;
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP.
+ *
+ * Change top bits to match most significant bit (47th or 56th bit
+ * depending on paging mode) in the address.
+ */
+ shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
+ rip = (long) regs->ip;
+ rip <<= shift_rip;
+ rip >>= shift_rip;
+ if (unlikely((unsigned long) rip != regs->ip))
+ return false;
+
+ /*
+ * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET.
+ */
+ if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
+ return false;
+
+ /* Use SYSRET to exit to userspace */
+ return true;
}
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c01776a51545..b1288e22cae8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
* Try to use SYSRET instead of IRET if we're returning to
* a completely clean 64-bit userspace context. If we're not,
* go to the slow exit path.
- * In the Xen PV case we must use iret anyway.
*/
-
- ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
- X86_FEATURE_XENPV
-
- movq RCX(%rsp), %rcx
- movq RIP(%rsp), %r11
-
- cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP.
- *
- * If width of "canonical tail" ever becomes variable, this will need
- * to be updated to remain correct on both old and new CPUs.
- *
- * Change top bits to match most significant bit (47th or 56th bit
- * depending on paging mode) in the address.
- */
-#ifdef CONFIG_X86_5LEVEL
- ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
- "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
- shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
- sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
- /* If this changed %rcx, it was not canonical */
- cmpq %rcx, %r11
- jne swapgs_restore_regs_and_return_to_usermode
-
- cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
-
- movq R11(%rsp), %r11
- cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
- jne swapgs_restore_regs_and_return_to_usermode
-
- /*
- * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
- * restoring TF results in a trap from userspace immediately after
- * SYSRET.
- */
- testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
- jnz swapgs_restore_regs_and_return_to_usermode
-
- /* nothing to check for RSP */
-
- cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
- jne swapgs_restore_regs_and_return_to_usermode
+ testb %al, %al
+ jz swapgs_restore_regs_and_return_to_usermode
/*
* We win! This label is here just for ease of understanding
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4fb36fba4b5a..be6c5515e0b9 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
-void do_syscall_64(struct pt_regs *regs, int nr);
+bool do_syscall_64(struct pt_regs *regs, int nr);
#endif /* CONFIG_X86_32 */
--
2.41.0
next prev parent reply other threads:[~2023-07-18 13:45 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-18 13:44 [PATCH 0/6] x86: Clean up fast syscall return validation Brian Gerst
2023-07-18 13:44 ` [PATCH 1/6] x86/entry/64: Remove obsolete comment on tracing vs. SYSRET Brian Gerst
2023-07-18 13:44 ` Brian Gerst [this message]
2023-07-18 14:16 ` [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C Mika Penttilä
2023-07-18 14:25 ` Brian Gerst
2023-07-18 14:49 ` Mika Penttilä
2023-07-18 15:21 ` Brian Gerst
2023-07-18 15:46 ` Brian Gerst
2023-07-18 13:44 ` [PATCH 3/6] x86/entry/compat: Combine return value test from syscall handler Brian Gerst
2023-07-18 13:44 ` [PATCH 4/6] x86/entry/32: Convert do_fast_syscall_32() to bool return type Brian Gerst
2023-07-18 13:44 ` [PATCH 5/6] x86/entry/32: Remove SEP test for SYSEXIT Brian Gerst
2023-07-18 13:44 ` [PATCH 6/6] x86/entry/32: Clean up syscall fast exit tests Brian Gerst
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230718134446.168654-3-brgerst@gmail.com \
--to=brgerst@gmail.com \
--cc=bp@alien8.de \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.