From: Ingo Molnar <mingo@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: Dave Hansen <dave.hansen@linux.intel.com>,
Andy Lutomirski <luto@amacapital.net>,
Thomas Gleixner <tglx@linutronix.de>,
"H . Peter Anvin" <hpa@zytor.com>,
Peter Zijlstra <peterz@infradead.org>,
Borislav Petkov <bp@alien8.de>,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 16/21] x86/entry/64: Use a per-CPU trampoline stack for IDT entries
Date: Mon, 27 Nov 2017 11:45:24 +0100 [thread overview]
Message-ID: <20171127104529.12435-17-mingo@kernel.org> (raw)
In-Reply-To: <20171127104529.12435-1-mingo@kernel.org>
From: Andy Lutomirski <luto@kernel.org>
Historically, IDT entries from usermode have always gone directly
to the running task's kernel stack. Rearrange it so that we enter on
a per-CPU trampoline stack and then manually switch to the task's stack.
This touches a couple of extra cachelines, but it gives us a chance
to run some code before we touch the kernel stack.
The asm isn't exactly beautiful, but I think that fully refactoring
it can wait.
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Link: https://lkml.kernel.org/r/fa3958723a1a85baeaf309c735b775841205800e.1511497875.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
arch/x86/entry/entry_64.S | 67 ++++++++++++++++++++++++++++++----------
arch/x86/entry/entry_64_compat.S | 5 ++-
arch/x86/include/asm/switch_to.h | 3 +-
arch/x86/include/asm/traps.h | 1 -
arch/x86/kernel/cpu/common.c | 6 ++--
arch/x86/kernel/traps.c | 12 +++----
6 files changed, 65 insertions(+), 29 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f81d50d7ceac..7d47199f405f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -563,6 +563,13 @@ END(irq_entries_start)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
cld
+
+ testb $3, CS-ORIG_RAX(%rsp)
+ jz 1f
+ SWAPGS
+ call switch_to_thread_stack
+1:
+
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
@@ -572,12 +579,8 @@ END(irq_entries_start)
jz 1f
/*
- * IRQ from user mode. Switch to kernel gsbase and inform context
- * tracking that we're in kernel mode.
- */
- SWAPGS
-
- /*
+ * IRQ from user mode.
+ *
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
@@ -831,6 +834,32 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+/*
+ * Switch to the thread stack. This is called with the IRET frame and
+ * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+ UNWIND_HINT_FUNC
+
+ pushq %rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+ UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
+ ret
+END(switch_to_thread_stack)
+
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -848,11 +877,12 @@ ENTRY(\sym)
ALLOC_PT_GPREGS_ON_STACK
- .if \paranoid
- .if \paranoid == 1
+ .if \paranoid < 2
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
- jnz 1f
+ jnz .Lfrom_usermode_switch_stack_\@
.endif
+
+ .if \paranoid
call paranoid_entry
.else
call error_entry
@@ -894,20 +924,15 @@ ENTRY(\sym)
jmp error_exit
.endif
- .if \paranoid == 1
+ .if \paranoid < 2
/*
- * Paranoid entry from userspace. Switch stacks and treat it
+ * Entry from userspace. Switch stacks and treat it
* as a normal entry. This means that paranoid handlers
* run in real process context if user_mode(regs).
*/
-1:
+.Lfrom_usermode_switch_stack_\@:
call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
- call sync_regs
- movq %rax, %rsp /* switch stack */
-
movq %rsp, %rdi /* pt_regs pointer */
.if \has_error_code
@@ -1170,6 +1195,14 @@ ENTRY(error_entry)
SWAPGS
.Lerror_entry_from_usermode_after_swapgs:
+ /* Put us onto the real thread stack. */
+ popq %r12 /* save return addr in %12 */
+ movq %rsp, %rdi /* arg0 = pt_regs pointer */
+ call sync_regs
+ movq %rax, %rsp /* switch stack */
+ ENCODE_FRAME_POINTER
+ pushq %r12
+
/*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index dcc6987f9bae..95ad40eb7eff 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
- /* Construct struct pt_regs on stack (iret frame is already on stack) */
pushq %rax /* pt_regs->orig_ax */
+
+ /* switch to thread stack expects orig_ax to be pushed */
+ call switch_to_thread_stack
+
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8c6bd6863db9..fab453ad2460 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -90,10 +90,9 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
/* This is used when switching tasks or entering/exiting vm86 mode. */
static inline void update_sp0(struct task_struct *task)
{
+ /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
#ifdef CONFIG_X86_32
load_sp0(task->thread.sp0);
-#else
- load_sp0(task_top_of_stack(task));
#endif
}
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1fadd310ff68..31051f35cbb7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
dotraplinkage void do_stack_segment(struct pt_regs *, long);
#ifdef CONFIG_X86_64
dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
#endif
dotraplinkage void do_general_protection(struct pt_regs *, long);
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c67742df569a..7c82a8a8bfda 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1645,11 +1645,13 @@ void cpu_init(void)
setup_cpu_entry_area(cpu);
/*
- * Initialize the TSS. Don't bother initializing sp0, as the initial
- * task never enters user mode.
+ * Initialize the TSS. sp0 points to the entry trampoline stack
+ * regardless of what task is running.
*/
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
load_TR_desc();
+ load_sp0((unsigned long)&get_cpu_entry_area(cpu)->tss +
+ offsetofend(struct tss_struct, SYSENTER_stack));
load_mm_ldt(&init_mm);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7d74d84896c6..450688c75b3b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -631,7 +631,7 @@ NOKPROBE_SYMBOL(do_int3);
*/
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
{
- struct pt_regs *regs = task_pt_regs(current);
+ struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
*regs = *eregs;
return regs;
}
@@ -648,13 +648,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/*
* This is called from entry_64.S early in handling a fault
* caused by a bad iret to user mode. To handle the fault
- * correctly, we want move our stack frame to task_pt_regs
- * and we want to pretend that the exception came from the
- * iret target.
+ * correctly, we want to move our stack frame to where it would
+ * be had we entered directly on the entry stack (rather than
+ * just below the IRET frame) and we want to pretend that the
+ * exception came from the IRET target.
*/
struct bad_iret_stack *new_stack =
- container_of(task_pt_regs(current),
- struct bad_iret_stack, regs);
+ (struct bad_iret_stack *)this_cpu_read(cpu_tss.x86_tss.sp0) - 1;
/* Copy the IRET target to the new stack. */
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
--
2.14.1
next prev parent reply other threads:[~2017-11-27 10:47 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-27 10:45 [PATCH 00/21] Preparatory patches for x86 KAISER support Ingo Molnar
2017-11-27 10:45 ` [PATCH 01/21] x86/unwinder/orc: Don't bail on stack overflow Ingo Molnar
2017-11-27 14:47 ` Borislav Petkov
2017-11-27 10:45 ` [PATCH 02/21] x86/unwinder: Handle stack overflows more gracefully Ingo Molnar
2017-11-27 17:36 ` Borislav Petkov
2017-11-28 4:55 ` Josh Poimboeuf
2017-11-27 10:45 ` [PATCH 03/21] x86/irq: Remove an old outdated comment about context tracking races Ingo Molnar
2017-11-27 10:45 ` [PATCH 04/21] x86/irq/64: Print the offending IP in the stack overflow warning Ingo Molnar
2017-11-27 10:45 ` [PATCH 05/21] x86/entry/64: Allocate and enable the SYSENTER stack Ingo Molnar
2017-11-27 10:45 ` [PATCH 06/21] x86/dumpstack: Add get_stack_info() support for " Ingo Molnar
2017-11-27 10:45 ` [PATCH 07/21] x86/entry/gdt: Put per-CPU GDT remaps in ascending order Ingo Molnar
2017-11-27 10:45 ` [PATCH 08/21] x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce 'struct cpu_entry_area' Ingo Molnar
2017-11-27 10:45 ` [PATCH 09/21] x86/kasan/64: Teach KASAN about the cpu_entry_area Ingo Molnar
2017-11-27 10:45 ` [PATCH 10/21] x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss Ingo Molnar
2017-11-27 10:45 ` [PATCH 11/21] x86/dumpstack: Handle stack overflow on all stacks Ingo Molnar
2017-11-27 19:26 ` Linus Torvalds
2017-11-28 4:29 ` Josh Poimboeuf
2017-11-28 5:29 ` Andy Lutomirski
2017-11-28 18:15 ` Josh Poimboeuf
2017-11-27 10:45 ` [PATCH 12/21] x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct Ingo Molnar
2017-11-27 10:45 ` [PATCH 13/21] x86/entry: Remap the TSS into the CPU entry area Ingo Molnar
2017-11-27 10:45 ` [PATCH 14/21] x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0 Ingo Molnar
2017-11-27 10:45 ` [PATCH 15/21] x86/espfix/64: Stop assuming that pt_regs is on the entry stack Ingo Molnar
2017-11-27 10:45 ` Ingo Molnar [this message]
2017-12-01 17:06 ` [PATCH 16/21] x86/entry/64: Use a per-CPU trampoline stack for IDT entries Dave Hansen
2017-12-01 17:44 ` Andy Lutomirski
2017-12-01 21:21 ` Dave Hansen
2017-12-01 21:59 ` Andy Lutomirski
2017-12-02 6:41 ` Kevin Easton
2017-11-27 10:45 ` [PATCH 17/21] x86/entry/64: Return to userspace from the trampoline stack Ingo Molnar
2017-11-27 10:45 ` [PATCH 18/21] x86/entry/64: Create a per-CPU SYSCALL entry trampoline Ingo Molnar
2017-11-27 10:45 ` [PATCH 19/21] x86/entry/64: Move the IST stacks into 'struct cpu_entry_area' Ingo Molnar
2017-11-27 10:45 ` [PATCH 20/21] x86/entry/64: Remove the SYSENTER stack canary Ingo Molnar
2017-11-27 10:45 ` [PATCH 21/21] x86/entry: Clean up the SYSENTER_stack code Ingo Molnar
2017-12-01 17:59 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171127104529.12435-17-mingo@kernel.org \
--to=mingo@kernel.org \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.