linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Joerg Roedel <joro@8bytes.org>
To: Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>, "H . Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andy Lutomirski <luto@kernel.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Juergen Gross <jgross@suse.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Borislav Petkov <bp@alien8.de>, Jiri Kosina <jkosina@suse.cz>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Brian Gerst <brgerst@gmail.com>,
	David Laight <David.Laight@aculab.com>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	Eduardo Valentin <eduval@amazon.com>,
	Greg KH <gregkh@linuxfoundation.org>,
	Will Deacon <will.deacon@arm.com>,
	aliguori@amazon.com, daniel.gruss@iaik.tugraz.at,
	hughd@google.com, keescook@google.com,
	Andrea Arcangeli <aarcange@redhat.com>,
	Waiman Long <llong@redhat.com>,
	jroedel@suse.de, joro@8bytes.org
Subject: [PATCH 02/16] x86/entry/32: Enter the kernel via trampoline stack
Date: Tue, 16 Jan 2018 17:36:45 +0100	[thread overview]
Message-ID: <1516120619-1159-3-git-send-email-joro@8bytes.org> (raw)
In-Reply-To: <1516120619-1159-1-git-send-email-joro@8bytes.org>

From: Joerg Roedel <jroedel@suse.de>

Use the sysenter stack as a trampoline stack to enter the
kernel. The sysenter stack is already in the cpu_entry_area
and will be mapped to userspace when PTI is enabled.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/entry/entry_32.S        | 89 +++++++++++++++++++++++++++++++++++-----
 arch/x86/include/asm/switch_to.h |  6 +--
 arch/x86/kernel/asm-offsets_32.c |  4 +-
 arch/x86/kernel/cpu/common.c     |  5 ++-
 arch/x86/kernel/process.c        |  2 -
 arch/x86/kernel/process_32.c     |  6 +++
 6 files changed, 91 insertions(+), 21 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index eb8c5615777b..5a7bdb73be9f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -222,6 +222,47 @@
 .endm
 
 /*
+ * Switch from the entry-trampline stack to the kernel stack of the
+ * running task.
+ *
+ * nr_regs is the number of dwords to push from the entry stack to the
+ * task stack. If it is > 0 it expects an irq frame at the bottom of the
+ * stack.
+ *
+ * check_user != 0 it will add a check to only switch stacks if the
+ * kernel entry was from user-space.
+ */
+.macro SWITCH_TO_KERNEL_STACK nr_regs=0 check_user=0
+
+	.if \check_user > 0 && \nr_regs > 0
+	testb $3, (\nr_regs - 4)*4(%esp)		/* CS */
+	jz .Lend_\@
+	.endif
+
+	pushl %edi
+	movl  %esp, %edi
+
+	/*
+	 * TSS_sysenter_stack is the offset from the bottom of the
+	 * entry-stack
+	 */
+	movl  TSS_sysenter_stack + ((\nr_regs + 1) * 4)(%esp), %esp
+
+	/* Copy the registers over */
+	.if \nr_regs > 0
+	i = 0
+	.rept \nr_regs
+	pushl (\nr_regs - i) * 4(%edi)
+	i = i + 1
+	.endr
+	.endif
+
+	mov (%edi), %edi
+
+.Lend_\@:
+.endm
+
+/*
  * %eax: prev task
  * %edx: next task
  */
@@ -401,7 +442,9 @@ ENTRY(xen_sysenter_target)
  * 0(%ebp) arg6
  */
 ENTRY(entry_SYSENTER_32)
-	movl	TSS_sysenter_stack(%esp), %esp
+	/* Kernel stack is empty */
+	SWITCH_TO_KERNEL_STACK
+
 .Lsysenter_past_esp:
 	pushl	$__USER_DS		/* pt_regs->ss */
 	pushl	%ebp			/* pt_regs->sp (stashed in bp) */
@@ -521,6 +564,10 @@ ENDPROC(entry_SYSENTER_32)
 ENTRY(entry_INT80_32)
 	ASM_CLAC
 	pushl	%eax			/* pt_regs->orig_ax */
+
+	/* Stack layout: ss, esp, eflags, cs, eip, orig_eax */
+	SWITCH_TO_KERNEL_STACK nr_regs=6 check_user=1
+
 	SAVE_ALL pt_regs_ax=$-ENOSYS	/* save rest */
 
 	/*
@@ -655,6 +702,10 @@ END(irq_entries_start)
 common_interrupt:
 	ASM_CLAC
 	addl	$-0x80, (%esp)			/* Adjust vector into the [-256, -1] range */
+
+	/* Stack layout: ss, esp, eflags, cs, eip, vector */
+	SWITCH_TO_KERNEL_STACK nr_regs=6 check_user=1
+
 	SAVE_ALL
 	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
@@ -663,16 +714,17 @@ common_interrupt:
 	jmp	ret_from_intr
 ENDPROC(common_interrupt)
 
-#define BUILD_INTERRUPT3(name, nr, fn)	\
-ENTRY(name)				\
-	ASM_CLAC;			\
-	pushl	$~(nr);			\
-	SAVE_ALL;			\
-	ENCODE_FRAME_POINTER;		\
-	TRACE_IRQS_OFF			\
-	movl	%esp, %eax;		\
-	call	fn;			\
-	jmp	ret_from_intr;		\
+#define BUILD_INTERRUPT3(name, nr, fn)			\
+ENTRY(name)						\
+	ASM_CLAC;					\
+	pushl	$~(nr);					\
+	SWITCH_TO_KERNEL_STACK nr_regs=6 check_user=1;	\
+	SAVE_ALL;					\
+	ENCODE_FRAME_POINTER;				\
+	TRACE_IRQS_OFF					\
+	movl	%esp, %eax;				\
+	call	fn;					\
+	jmp	ret_from_intr;				\
 ENDPROC(name)
 
 #define BUILD_INTERRUPT(name, nr)		\
@@ -893,6 +945,9 @@ ENTRY(page_fault)
 END(page_fault)
 
 common_exception:
+	/* Stack layout: ss, esp, eflags, cs, eip, error_code, handler */
+	SWITCH_TO_KERNEL_STACK nr_regs=7 check_user=1
+
 	/* the function address is in %gs's slot on the stack */
 	pushl	%fs
 	pushl	%es
@@ -936,6 +991,10 @@ ENTRY(debug)
 	 */
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
+
+	/* Stack layout: ss, esp, eflags, cs, eip, $-1 */
+	SWITCH_TO_KERNEL_STACK nr_regs=6 check_user=1
+
 	SAVE_ALL
 	ENCODE_FRAME_POINTER
 	xorl	%edx, %edx			# error code 0
@@ -971,6 +1030,10 @@ END(debug)
  */
 ENTRY(nmi)
 	ASM_CLAC
+
+	/* Stack layout: ss, esp, eflags, cs, eip */
+	SWITCH_TO_KERNEL_STACK nr_regs=5 check_user=1
+
 #ifdef CONFIG_X86_ESPFIX32
 	pushl	%eax
 	movl	%ss, %eax
@@ -1034,6 +1097,10 @@ END(nmi)
 ENTRY(int3)
 	ASM_CLAC
 	pushl	$-1				# mark this as an int
+
+	/* Stack layout: ss, esp, eflags, cs, eip, vector */
+	SWITCH_TO_KERNEL_STACK nr_regs=6 check_user=1
+
 	SAVE_ALL
 	ENCODE_FRAME_POINTER
 	TRACE_IRQS_OFF
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index eb5f7999a893..20e5f7ab8260 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -89,13 +89,9 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 /* This is used when switching tasks or entering/exiting vm86 mode. */
 static inline void update_sp0(struct task_struct *task)
 {
-	/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
-#ifdef CONFIG_X86_32
-	load_sp0(task->thread.sp0);
-#else
+	/* sp0 always points to the entry trampoline stack, which is constant: */
 	if (static_cpu_has(X86_FEATURE_XENPV))
 		load_sp0(task_top_of_stack(task));
-#endif
 }
 
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 654229bac2fc..7270dd834f4b 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -47,9 +47,11 @@ void foo(void)
 	BLANK();
 
 	/* Offset from the sysenter stack to tss.sp0 */
-	DEFINE(TSS_sysenter_stack, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+	DEFINE(TSS_sysenter_stack, offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
 	       offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
+	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 	BLANK();
 	OFFSET(stack_canary_offset, stack_canary, canary);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef29ad001991..20a71c914e59 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1649,11 +1649,12 @@ void cpu_init(void)
 	enter_lazy_tlb(&init_mm, curr);
 
 	/*
-	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
-	 * task never enters user mode.
+	 * Initialize the TSS.  sp0 points to the entry trampoline stack
+	 * regardless of what task is running.
 	 */
 	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 	load_TR_desc();
+	load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
 	load_mm_ldt(&init_mm);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 832a6acd730f..a9950946b263 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -57,14 +57,12 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
 		 */
 		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
 
-#ifdef CONFIG_X86_64
 		/*
 		 * .sp1 is cpu_current_top_of_stack.  The init task never
 		 * runs user code, but cpu_current_top_of_stack should still
 		 * be well defined before the first context switch.
 		 */
 		.sp1 = TOP_OF_INIT_STACK,
-#endif
 
 #ifdef CONFIG_X86_32
 		.ss0 = __KERNEL_DS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5224c6099184..452eeac00b80 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -292,6 +292,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	this_cpu_write(cpu_current_top_of_stack,
 		       (unsigned long)task_stack_page(next_p) +
 		       THREAD_SIZE);
+	/*
+	 * TODO: Find a way to let cpu_current_top_of_stack point to
+	 * cpu_tss_rw.x86_tss.sp1. Doing so now results in stack corruption with
+	 * iret exceptions.
+	 */
+	this_cpu_write(cpu_tss_rw.x86_tss.sp1, next_p->thread.sp0);
 
 	/*
 	 * Restore %gs if needed (which is common)
-- 
2.13.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2018-01-19 15:29 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-16 16:36 [RFC PATCH 00/16] PTI support for x86-32 Joerg Roedel
2018-01-16 16:36 ` [PATCH 01/16] x86/entry/32: Rename TSS_sysenter_sp0 to TSS_sysenter_stack Joerg Roedel
2018-01-16 18:35   ` Thomas Gleixner
2018-01-16 16:36 ` Joerg Roedel [this message]
2018-01-16 20:30   ` [PATCH 02/16] x86/entry/32: Enter the kernel via trampoline stack Thomas Gleixner
2018-01-16 22:37     ` Andy Lutomirski
2018-01-16 22:45   ` Andy Lutomirski
2018-01-17  9:18     ` Joerg Roedel
2018-01-17 18:10       ` Andy Lutomirski
2018-01-19  9:55         ` Joerg Roedel
2018-01-19 16:30           ` Andy Lutomirski
2018-01-22 10:11             ` Joerg Roedel
2018-01-22 17:46               ` Andy Lutomirski
2018-01-17  2:47   ` Boris Ostrovsky
2018-01-17  9:02     ` Joerg Roedel
2018-01-17 14:04       ` Andrew Cooper
2018-01-17 15:22         ` Boris Ostrovsky
2018-01-16 16:36 ` [PATCH 03/16] x86/entry/32: Leave the kernel via the " Joerg Roedel
2018-01-16 22:48   ` Andy Lutomirski
2018-01-17  9:24     ` Joerg Roedel
2018-01-17 13:57       ` Brian Gerst
2018-01-17 14:00         ` Brian Gerst
2018-01-17 14:14           ` Joerg Roedel
2018-01-17 14:45             ` Josh Poimboeuf
2018-01-17 14:10         ` Joerg Roedel
2018-01-17 18:12           ` Andy Lutomirski
2018-01-19  9:57             ` Joerg Roedel
2018-01-16 16:36 ` [PATCH 04/16] x86/pti: Define X86_CR3_PTI_PCID_USER_BIT on x86_32 Joerg Roedel
2018-01-16 22:46   ` Andy Lutomirski
2018-01-17  9:26     ` Joerg Roedel
2018-01-16 16:36 ` [PATCH 05/16] x86/pgtable: Move pgdp kernel/user conversion functions to pgtable.h Joerg Roedel
2018-01-16 16:36 ` [PATCH 06/16] x86/mm/ldt: Reserve high address-space range for the LDT Joerg Roedel
2018-01-16 16:52   ` Peter Zijlstra
2018-01-16 17:13     ` Joerg Roedel
2018-01-16 17:31       ` Peter Zijlstra
2018-01-16 17:34         ` Waiman Long
2018-01-16 22:51     ` Andy Lutomirski
2018-01-17  7:59       ` Peter Zijlstra
2018-01-16 16:36 ` [PATCH 07/16] x86/mm: Move two more functions from pgtable_64.h to pgtable.h Joerg Roedel
2018-01-16 18:03   ` Dave Hansen
2018-01-16 19:11     ` Joerg Roedel
2018-01-16 19:34       ` Thomas Gleixner
2018-01-16 16:36 ` [PATCH 08/16] x86/pgtable/32: Allocate 8k page-tables when PTI is enabled Joerg Roedel
2018-01-17 23:43   ` Andy Lutomirski
2018-01-19  9:57     ` Joerg Roedel
2018-01-16 16:36 ` [PATCH 09/16] x86/mm/pti: Clone CPU_ENTRY_AREA on PMD level on x86_32 Joerg Roedel
2018-01-16 21:03   ` Thomas Gleixner
2018-01-16 16:36 ` [PATCH 10/16] x86/mm/pti: Populate valid user pud entries Joerg Roedel
2018-01-16 18:06   ` Dave Hansen
2018-01-16 19:41     ` Joerg Roedel
2018-01-16 21:06   ` Thomas Gleixner
2018-01-16 16:36 ` [PATCH 11/16] x86/mm/pgtable: Move pti_set_user_pgd() to pgtable.h Joerg Roedel
2018-01-16 16:36 ` [PATCH 12/16] x86/mm/pae: Populate the user page-table with user pgd's Joerg Roedel
2018-01-16 18:11   ` Dave Hansen
2018-01-16 19:44     ` Joerg Roedel
2018-01-16 21:10   ` Thomas Gleixner
2018-01-16 21:15     ` Dave Hansen
2018-01-16 16:36 ` [PATCH 13/16] x86/mm/pti: Add an overflow check to pti_clone_pmds() Joerg Roedel
2018-01-16 16:36 ` [PATCH 14/16] x86/mm/legacy: Populate the user page-table with user pgd's Joerg Roedel
2018-01-17 23:41   ` Andy Lutomirski
2018-01-16 16:36 ` [PATCH 15/16] x86/entry/32: Switch between kernel and user cr3 on entry/exit Joerg Roedel
2018-01-16 16:36 ` [PATCH 16/16] x86/pti: Allow CONFIG_PAGE_TABLE_ISOLATION for x86_32 Joerg Roedel
2018-01-16 18:14 ` [RFC PATCH 00/16] PTI support for x86-32 Dave Hansen
2018-01-16 19:46   ` Joerg Roedel
2018-01-16 18:59 ` Linus Torvalds
2018-01-16 19:02   ` Dave Hansen
2018-01-16 19:21   ` Andrew Cooper
2018-01-16 19:55   ` Joerg Roedel
2018-01-16 21:20 ` Thomas Gleixner
2018-01-17  9:55   ` Joerg Roedel
2018-01-16 22:26 ` Andy Lutomirski
2018-01-17  9:33   ` Joerg Roedel
2018-01-19 10:55 ` Pavel Machek
2018-01-19 11:07   ` Joerg Roedel
2018-01-19 12:58     ` Pavel Machek
2018-01-21 20:13 ` Nadav Amit
2018-01-21 20:44   ` Nadav Amit
2018-01-21 23:46     ` Nadav Amit
2018-01-22  2:11       ` Linus Torvalds
2018-01-22  2:20         ` hpa
2018-01-22 20:14           ` Linus Torvalds
2018-01-22 21:10             ` H. Peter Anvin
2018-01-23 14:38               ` Alan Cox
2018-01-22  2:27         ` Nadav Amit
2018-01-22  8:56       ` Joerg Roedel
2018-01-23 14:57         ` Alan Cox
2018-01-25 17:09         ` Alan Cox
2018-01-26 12:36           ` Joerg Roedel
2018-01-22  9:55       ` David Laight
2018-01-22 10:04         ` Joerg Roedel
2018-01-24 18:58 ` Krzysztof Mazur
2018-01-25 22:09   ` Nadav Amit
2018-01-26  9:28     ` Krzysztof Mazur

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1516120619-1159-3-git-send-email-joro@8bytes.org \
    --to=joro@8bytes.org \
    --cc=David.Laight@aculab.com \
    --cc=aarcange@redhat.com \
    --cc=aliguori@amazon.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=daniel.gruss@iaik.tugraz.at \
    --cc=dave.hansen@intel.com \
    --cc=dvlasenk@redhat.com \
    --cc=eduval@amazon.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jgross@suse.com \
    --cc=jkosina@suse.cz \
    --cc=jpoimboe@redhat.com \
    --cc=jroedel@suse.de \
    --cc=keescook@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=llong@redhat.com \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=will.deacon@arm.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).