From: Brian Gerst <brgerst@gmail.com>
To: x86@kernel.org, linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>, "H. Peter Anvin" <hpa@zytor.com>,
Denys Vlasenko <dvlasenk@redhat.com>,
Andy Lutomirski <luto@amacapital.net>,
Borislav Petkov <bp@suse.de>,
Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 3/4] x86: Rewrite switch_to() code
Date: Sat, 21 May 2016 12:04:50 -0400 [thread overview]
Message-ID: <1463846691-18498-4-git-send-email-brgerst@gmail.com> (raw)
In-Reply-To: <1463846691-18498-1-git-send-email-brgerst@gmail.com>
Move the low-level context switch code to an out-of-line asm stub instead of
using complex inline asm. This allows constructing a new stack frame for the
child process to make it seamlessly flow to ret_from_fork without an extra
test and branch in __switch_to(). It also improves code generation for
__schedule() by using the C calling convention instead of clobbering all
registers.
Signed-off-by: Brian Gerst <brgerst@gmail.com>
---
arch/x86/entry/entry_32.S | 38 ++++++++++
arch/x86/entry/entry_64.S | 42 +++++++++++-
arch/x86/include/asm/processor.h | 3 -
arch/x86/include/asm/switch_to.h | 137 ++++++-------------------------------
arch/x86/include/asm/thread_info.h | 2 -
arch/x86/kernel/asm-offsets.c | 6 ++
arch/x86/kernel/asm-offsets_32.c | 5 ++
arch/x86/kernel/asm-offsets_64.c | 5 ++
arch/x86/kernel/process_32.c | 8 ++-
arch/x86/kernel/process_64.c | 7 +-
arch/x86/kernel/smpboot.c | 1 -
11 files changed, 124 insertions(+), 130 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ee6fea0..05e5340 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,6 +204,44 @@
POP_GS_EX
.endm
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+ /*
+ * Save callee-saved registers
+ * This must match the order in struct fork_frame
+ * Frame pointer must be last for get_wchan
+ */
+ pushl %ebx
+ pushl %edi
+ pushl %esi
+ pushl %ebp
+
+ /* switch stack */
+ movl %esp, TASK_threadsp(%eax)
+ movl TASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+ movl TASK_stack_canary(%edx), %ebx
+ movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+ /* restore callee-saved registers */
+ popl %ebp
+ popl %esi
+ popl %edi
+ popl %ebx
+
+ jmp __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ */
ENTRY(ret_from_fork)
pushl %eax
call schedule_tail
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ab9f8c8..0542ad1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -365,13 +365,49 @@ END(ptregs_\func)
#include <asm/syscalls_64.h>
/*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+ /*
+ * Save callee-saved registers
+ * This must match the order in struct fork_frame
+ * Frame pointer must be last for get_wchan
+ */
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbp
+
+ /* switch stack */
+ movq %rsp, TASK_threadsp(%rdi)
+ movq TASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+ movq TASK_stack_canary(%rsi), %rbx
+ movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+ /* restore callee-saved registers */
+ popq %rbp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+
+ jmp __switch_to
+END(__switch_to_asm)
+
+/*
* A newly forked process directly context switches into this address.
*
- * rdi: prev task we switched from
+ * rax: prev task we switched from
*/
ENTRY(ret_from_fork)
- LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+ movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
testb $3, CS(%rsp) /* from kernel_thread? */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3..d3c2598 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -384,9 +384,6 @@ struct thread_struct {
unsigned short fsindex;
unsigned short gsindex;
#endif
-#ifdef CONFIG_X86_32
- unsigned long ip;
-#endif
#ifdef CONFIG_X86_64
unsigned long fsbase;
unsigned long gsbase;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1..b6c9e0c 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,35 @@
#define _ASM_X86_SWITCH_TO_H
struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+ struct task_struct *next);
+
__visible struct task_struct *__switch_to(struct task_struct *prev,
- struct task_struct *next);
+ struct task_struct *next);
struct tss_struct;
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss);
-#ifdef CONFIG_X86_32
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movl %P[task_canary](%[next]), %%ebx\n\t" \
- "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam \
- , [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
+struct fork_frame {
+ unsigned long bp;
+#ifdef CONFIG_X86_64
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+#else
+ unsigned long si;
+ unsigned long di;
+#endif
+ unsigned long bx;
+ unsigned long ret_addr;
+ struct pt_regs regs;
+};
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
#define switch_to(prev, next, last) \
do { \
- /* \
- * Context-switching clobbers all registers, so we clobber \
- * them explicitly, via unused output variables. \
- * (EAX and EBP is not listed because EBP is saved/restored \
- * explicitly for wchan access and EAX is the return value of \
- * __switch_to()) \
- */ \
- unsigned long ebx, ecx, edx, esi, edi; \
- \
- asm volatile("pushl %%ebp\n\t" /* save EBP */ \
- "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
- "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
- "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
- "pushl %[next_ip]\n\t" /* restore EIP */ \
- __switch_canary \
- "jmp __switch_to\n" /* regparm call */ \
- "1:\t" \
- "popl %%ebp\n\t" /* restore EBP */ \
- \
- /* output parameters */ \
- : [prev_sp] "=m" (prev->thread.sp), \
- [prev_ip] "=m" (prev->thread.ip), \
- "=a" (last), \
- \
- /* clobbered output registers: */ \
- "=b" (ebx), "=c" (ecx), "=d" (edx), \
- "=S" (esi), "=D" (edi) \
- \
- __switch_canary_oparam \
- \
- /* input parameters: */ \
- : [next_sp] "m" (next->thread.sp), \
- [next_ip] "m" (next->thread.ip), \
- \
- /* regparm parameters for __switch_to(): */ \
- [prev] "a" (prev), \
- [next] "d" (next) \
- \
- __switch_canary_iparam \
- \
- : /* reloaded segment registers */ \
- "memory"); \
+ ((last) = __switch_to_asm((prev), (next))); \
} while (0)
-#else /* CONFIG_X86_32 */
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
-
-#define __EXTRA_CLOBBER \
- , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
- "r12", "r13", "r14", "r15", "flags"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movq %P[task_canary](%%rsi),%%r8\n\t" \
- "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam \
- , [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
- * has no effect.
- */
-#define switch_to(prev, next, last) \
- asm volatile(SAVE_CONTEXT \
- "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
- "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
- "call __switch_to\n\t" \
- "movq "__percpu_arg([current_task])",%%rsi\n\t" \
- __switch_canary \
- "movq %P[thread_info](%%rsi),%%r8\n\t" \
- "movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
- "jnz ret_from_fork\n\t" \
- RESTORE_CONTEXT \
- : "=a" (last) \
- __switch_canary_oparam \
- : [next] "S" (next), [prev] "D" (prev), \
- [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
- [ti_flags] "i" (offsetof(struct thread_info, flags)), \
- [_tif_fork] "i" (_TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (current_task) \
- __switch_canary_iparam \
- : "memory", "cc" __EXTRA_CLOBBER)
-
-#endif /* CONFIG_X86_32 */
-
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 30c133a..20d56ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -99,7 +99,6 @@ struct thread_info {
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
-#define TIF_FORK 18 /* ret_from_fork */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
@@ -123,7 +122,6 @@ struct thread_info {
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
-#define _TIF_FORK (1 << TIF_FORK)
#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 674134e..ec41c79 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,6 +29,12 @@
void common(void) {
BLANK();
+ OFFSET(TASK_threadsp, task_struct, thread.sp);
+#ifdef CONFIG_CC_STACKPROTECTOR
+ OFFSET(TASK_stack_canary, task_struct, stack_canary);
+#endif
+
+ BLANK();
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d2..880aa09 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
/* Size of SYSENTER_stack */
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+#ifdef CONFIG_CC_STACKPROTECTOR
+ BLANK();
+ OFFSET(stack_canary_offset, stack_canary, canary);
+#endif
+
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97..210927e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK();
+#ifdef CONFIG_CC_STACKPROTECTOR
+ DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+ BLANK();
+#endif
+
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
DEFINE(NR_syscalls, sizeof(syscalls_64));
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f95091..0ba6fdf 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -133,17 +133,19 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
struct pt_regs *childregs = task_pt_regs(p);
+ struct fork_frame *frame = container_of(childregs, struct fork_frame, regs);
struct task_struct *tsk;
int err;
- p->thread.sp = (unsigned long) childregs;
+ frame->bp = 0;
+ p->thread.sp = (unsigned long) frame;
p->thread.sp0 = (unsigned long) (childregs+1);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
- p->thread.ip = (unsigned long) ret_from_kernel_thread;
+ frame->ret_addr = (unsigned long) ret_from_kernel_thread;
task_user_gs(p) = __KERNEL_STACK_CANARY;
childregs->ds = __USER_DS;
childregs->es = __USER_DS;
@@ -161,7 +163,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
if (sp)
childregs->sp = sp;
- p->thread.ip = (unsigned long) ret_from_fork;
+ frame->ret_addr = (unsigned long) ret_from_fork;
task_user_gs(p) = get_user_gs(current_pt_regs());
p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e789ca..9fab915 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,11 +142,14 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
+ struct fork_frame *frame;
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
- p->thread.sp = (unsigned long) childregs;
- set_tsk_thread_flag(p, TIF_FORK);
+ frame = container_of(childregs, struct fork_frame, regs);
+ frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) frame;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fafe8b9..8feb392 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -928,7 +928,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
per_cpu(cpu_current_top_of_stack, cpu) =
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
#else
- clear_tsk_thread_flag(idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
#endif
}
--
2.5.5
next prev parent reply other threads:[~2016-05-21 16:05 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-05-21 16:04 [PATCH 0/4] x86: Rewrite switch_to() Brian Gerst
2016-05-21 16:04 ` [PATCH 1/4] x86: Save return value from kernel_thread Brian Gerst
2016-05-22 1:44 ` Andy Lutomirski
2016-05-22 2:19 ` Brian Gerst
2016-05-21 16:04 ` [PATCH 2/4] x86-32, kgdb: Don't use thread.ip in sleeping_thread_to_gdb_regs() Brian Gerst
2016-05-23 17:05 ` Andy Lutomirski
2016-05-21 16:04 ` Brian Gerst [this message]
2016-05-22 17:59 ` [PATCH 3/4] x86: Rewrite switch_to() code Andy Lutomirski
2016-05-22 19:31 ` Brian Gerst
2016-05-22 21:07 ` Andy Lutomirski
2016-05-23 2:34 ` Josh Poimboeuf
2016-05-23 4:47 ` Andy Lutomirski
2016-05-23 11:40 ` Josh Poimboeuf
2016-05-23 11:49 ` Brian Gerst
2016-05-23 12:05 ` Josh Poimboeuf
2016-05-23 11:14 ` Brian Gerst
2016-05-23 11:47 ` Josh Poimboeuf
2016-05-23 11:49 ` Josh Poimboeuf
2016-05-23 16:46 ` Josh Poimboeuf
2016-05-23 17:03 ` Andy Lutomirski
2016-05-23 18:44 ` Josh Poimboeuf
2016-07-12 14:16 ` Josh Poimboeuf
2016-06-15 1:31 ` Andy Lutomirski
2016-06-15 8:03 ` Ingo Molnar
2016-06-15 11:52 ` Brian Gerst
2016-05-21 16:04 ` [PATCH 4/4] x86: Pass kernel thread parameters in fork_frame Brian Gerst
2016-05-22 18:01 ` Andy Lutomirski
2016-05-22 19:21 ` Brian Gerst
2016-05-23 15:23 ` Josh Poimboeuf
2016-05-23 15:36 ` Andy Lutomirski
2016-05-23 21:04 ` Brian Gerst
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1463846691-18498-4-git-send-email-brgerst@gmail.com \
--to=brgerst@gmail.com \
--cc=bp@suse.de \
--cc=dvlasenk@redhat.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=mingo@kernel.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.