All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stas Sergeev <stsp@aknet.ru>
To: Petr Vandrovec <vandrove@vc.cvut.cz>
Cc: linux-kernel@vger.kernel.org
Subject: Re: ESP corruption bug - what CPUs are affected?
Date: Wed, 22 Sep 2004 22:49:45 +0400	[thread overview]
Message-ID: <4151C949.1080807@aknet.ru> (raw)
In-Reply-To: <20040918203529.GA4447@vana.vc.cvut.cz>

[-- Attachment #1: Type: text/plain, Size: 868 bytes --]

Hi Petr et al.

I coded up something that remotely looks like the
discussed patch.
I have deviated from your proposals at some important
points:
1. I am not allocating the ring1 stack separately,
I am allocating it on a ring0 stack. Much simpler
code, although non-reentrant/preempt-unsafe.
2. I am disabling the interrupts after all. That's
because of the preempt-unsafeness. I pass up the
IOPL=1 when necessary, to avoid problems.
But I guess also with your technique the interrupts
had to be disabled, unless the ring1 stack is
per-thread.
3. I am using LAR. Do you really think it can be
slower than the whole thing of locating LDT?

Let me know if I did something stupid.
The patch is attached.
I tested (pretty much) everything in it, except
probably the "popl %esp" restartability. But that
one looks fairly simple and should work.

Does this patch look good?


[-- Attachment #2: linux-2.6.8-stacks2.diff --]
[-- Type: text/x-patch, Size: 9860 bytes --]

diff -urN linux-2.6.8-pcsp/arch/i386/kernel/entry.S linux-2.6.8-stacks/arch/i386/kernel/entry.S
--- linux-2.6.8-pcsp/arch/i386/kernel/entry.S	2004-06-10 13:28:35.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/kernel/entry.S	2004-09-22 15:39:50.000000000 +0400
@@ -70,9 +70,16 @@
 CF_MASK		= 0x00000001
 TF_MASK		= 0x00000100
 IF_MASK		= 0x00000200
-DF_MASK		= 0x00000400 
+DF_MASK		= 0x00000400
+IOPL1_MASK 	= 0x00001000
+IOPL2_MASK 	= 0x00002000
+IOPL_MASK 	= 0x00003000
 NT_MASK		= 0x00004000
+RF_MASK		= 0x00010000
 VM_MASK		= 0x00020000
+AC_MASK 	= 0x00040000
+VIF_MASK 	= 0x00080000
+VIP_MASK 	= 0x00100000
 
 #ifdef CONFIG_PREEMPT
 #define preempt_stop		cli
@@ -81,6 +88,49 @@
 #define resume_kernel		restore_all
 #endif
 
+/*
+ * First, reserve the 0x400 bytes on stack for the NMI
+ * handler - it uses the same stack.
+ * Second, push the additional iret frame to switch to
+ * the Ring-1 trampoline. The correct value of ESP
+ * higher word is pushed here and popped on Ring-1.
+ * espfix_trampoline is working on a "small" stack.
+ * We patch its base in GDT here to allow the Ring-1 code
+ * to access the right place on stack. This is preempt-unsafe
+ * and therefore must be done with the interrupts disabled.
+ * To allow Ring-1 code to re-enable interrupts, we make
+ * sure it will be executing with at least IOPL1.
+ */
+#define NMI_STACK_RESERVE 0x400
+#define MAKE_ESPFIX \
+	cli; \
+	subl $NMI_STACK_RESERVE, %esp; \
+	.rept 5; \
+	pushl NMI_STACK_RESERVE+0x10(%esp); \
+	.endr; \
+	pushl %eax; \
+	movl %esp, %eax; \
+	pushl %ebp; \
+	GET_THREAD_INFO(%ebp); \
+	movl TI_cpu(%ebp), %ebp; \
+	shll $GDT_SIZE_SHIFT, %ebp; \
+	addl $cpu_gdt_table, %ebp; \
+	movw %ax, ((GDT_ENTRY_ESPFIX_SS << GDT_ENTRY_SHIFT) + 2)(%ebp); \
+	shrl $16, %eax; \
+	movb %al, ((GDT_ENTRY_ESPFIX_SS << GDT_ENTRY_SHIFT) + 4)(%ebp); \
+	movb %ah, ((GDT_ENTRY_ESPFIX_SS << GDT_ENTRY_SHIFT) + 7)(%ebp); \
+	popl %ebp; \
+	popl %eax; \
+	pushw 14(%esp); \
+	pushw $4; \
+	pushl $__ESPFIX_SS; \
+	pushl $0; \
+	pushl 20(%esp); \
+	andl $~(IF_MASK | TF_MASK | RF_MASK | NT_MASK | AC_MASK), (%esp); \
+	orl $IOPL1_MASK, (%esp); \
+	pushl $__ESPFIX_CS; \
+	pushl $espfix_trampoline;
+
 #define SAVE_ALL \
 	cld; \
 	pushl %es; \
@@ -122,9 +172,24 @@
 .previous
 
 
+/* If returning to Ring-3, not to V86, and with
+ * the small stack, try to fix the higher word of
+ * ESP, as the CPU won't restore it from stack.
+ * This is an "official" bug of all the x86-compatible
+ * CPUs, which we can try to work around to make
+ * dosemu happy. */
 #define RESTORE_ALL	\
-	RESTORE_REGS	\
-	addl $4, %esp;	\
+	movl EFLAGS(%esp), %eax; \
+	movb CS(%esp), %al; \
+	andl $(VM_MASK | 2), %eax; \
+	cmpl $2, %eax; \
+	jne 3f; \
+	larl OLDSS(%esp), %eax; \
+	testl $0x00400000, %eax; \
+3:	RESTORE_REGS	\
+	leal 4(%esp), %esp; \
+	jnz 1f; \
+	MAKE_ESPFIX \
 1:	iret;		\
 .section .fixup,"ax";   \
 2:	sti;		\
@@ -174,6 +239,23 @@
 	jmp do_lcall
 
 
+ENTRY(espfix_trampoline)
+	popl %esp
+espfix_past_esp:
+1:	iret
+.section .fixup,"ax"
+2:	sti
+	movl $(__USER_DS), %edx
+	movl %edx, %ds
+	movl %edx, %es
+	pushl $11
+	call do_exit
+.previous
+.section __ex_table,"a"
+	.align 4
+	.long 1b,2b
+.previous
+
 ENTRY(ret_from_fork)
 	pushl %eax
 	call schedule_tail
@@ -196,7 +278,7 @@
 	GET_THREAD_INFO(%ebp)
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb CS(%esp), %al
-	testl $(VM_MASK | 3), %eax
+	testl $(VM_MASK | 2), %eax
 	jz resume_kernel		# returning to kernel or vm86-space
 ENTRY(resume_userspace)
  	cli				# make sure we don't miss an interrupt
@@ -504,7 +586,12 @@
 ENTRY(nmi)
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
-	pushl %eax
+	cmpl $espfix_past_esp,(%esp)
+	jne 1f
+	/* restart the "popl %esp" */
+	decl (%esp)
+	subw $4, 12(%esp)
+1:	pushl %eax
 	movl %esp,%eax
 	/* Do not access memory above the end of our stack page,
 	 * it might not exist.
diff -urN linux-2.6.8-pcsp/arch/i386/kernel/head.S linux-2.6.8-stacks/arch/i386/kernel/head.S
--- linux-2.6.8-pcsp/arch/i386/kernel/head.S	2004-08-10 11:02:36.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/kernel/head.S	2004-09-21 14:58:16.000000000 +0400
@@ -514,8 +514,8 @@
 	.quad 0x00009a0000000000	/* 0xc0 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0xc8 APM DS    data */
 
-	.quad 0x0000000000000000	/* 0xd0 - unused */
-	.quad 0x0000000000000000	/* 0xd8 - unused */
+	.quad 0x00cfba000000ffff	/* 0xd0 - ESPfix CS */
+	.quad 0x0000b2000000ffff	/* 0xd8 - ESPfix SS */
 	.quad 0x0000000000000000	/* 0xe0 - unused */
 	.quad 0x0000000000000000	/* 0xe8 - unused */
 	.quad 0x0000000000000000	/* 0xf0 - unused */
diff -urN linux-2.6.8-pcsp/arch/i386/kernel/process.c linux-2.6.8-stacks/arch/i386/kernel/process.c
--- linux-2.6.8-pcsp/arch/i386/kernel/process.c	2004-06-21 09:19:07.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/kernel/process.c	2004-09-21 10:45:55.000000000 +0400
@@ -225,7 +225,7 @@
 	printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->eip);
 
-	if (regs->xcs & 3)
+	if (regs->xcs & 2)
 		printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
 	printk(" EFLAGS: %08lx    %s  (%s)\n",regs->eflags, print_tainted(),UTS_RELEASE);
 	printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
diff -urN linux-2.6.8-pcsp/arch/i386/kernel/signal.c linux-2.6.8-stacks/arch/i386/kernel/signal.c
--- linux-2.6.8-pcsp/arch/i386/kernel/signal.c	2004-08-10 11:02:36.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/kernel/signal.c	2004-09-21 10:48:38.000000000 +0400
@@ -558,7 +558,7 @@
 	 * kernel mode. Just return without doing anything
 	 * if so.
 	 */
-	if ((regs->xcs & 3) != 3)
+	if (!(regs->xcs & 2))
 		return 1;
 
 	if (current->flags & PF_FREEZE) {
diff -urN linux-2.6.8-pcsp/arch/i386/kernel/traps.c linux-2.6.8-stacks/arch/i386/kernel/traps.c
--- linux-2.6.8-pcsp/arch/i386/kernel/traps.c	2004-08-10 11:02:36.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/kernel/traps.c	2004-09-21 10:47:37.000000000 +0400
@@ -210,7 +210,7 @@
 
 	esp = (unsigned long) (&regs->esp);
 	ss = __KERNEL_DS;
-	if (regs->xcs & 3) {
+	if (regs->xcs & 2) {
 		in_kernel = 0;
 		esp = regs->esp;
 		ss = regs->xss & 0xffff;
@@ -264,7 +264,7 @@
 	char c;
 	unsigned long eip;
 
-	if (regs->xcs & 3)
+	if (regs->xcs & 2)
 		goto no_bug;		/* Not in kernel */
 
 	eip = regs->eip;
@@ -335,7 +335,7 @@
 
 static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
 {
-	if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs))
+	if (!(regs->eflags & VM_MASK) && !(2 & regs->xcs))
 		die(str, regs, err);
 }
 
@@ -357,7 +357,7 @@
 		goto trap_signal;
 	}
 
-	if (!(regs->xcs & 3))
+	if (!(regs->xcs & 2))
 		goto kernel_trap;
 
 	trap_signal: {
@@ -437,7 +437,7 @@
 	if (regs->eflags & VM_MASK)
 		goto gp_in_vm86;
 
-	if (!(regs->xcs & 3))
+	if (!(regs->xcs & 2))
 		goto gp_in_kernel;
 
 	current->thread.error_code = error_code;
@@ -614,7 +614,7 @@
 		 * allowing programs to debug themselves without the ptrace()
 		 * interface.
 		 */
-		if ((regs->xcs & 3) == 0)
+		if ((regs->xcs & 2) == 0)
 			goto clear_TF_reenable;
 		if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
 			goto clear_TF;
@@ -630,7 +630,7 @@
 	/* If this is a kernel mode trap, save the user PC on entry to 
 	 * the kernel, that's what the debugger can make sense of.
 	 */
-	info.si_addr = ((regs->xcs & 3) == 0) ? (void *)tsk->thread.eip : 
+	info.si_addr = ((regs->xcs & 2) == 0) ? (void *)tsk->thread.eip : 
 	                                        (void *)regs->eip;
 	force_sig_info(SIGTRAP, &info, tsk);
 
diff -urN linux-2.6.8-pcsp/arch/i386/mm/extable.c linux-2.6.8-stacks/arch/i386/mm/extable.c
--- linux-2.6.8-pcsp/arch/i386/mm/extable.c	2004-06-09 15:44:19.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/mm/extable.c	2004-09-21 17:17:05.000000000 +0400
@@ -26,6 +26,11 @@
 	}
 #endif
 
+	if (unlikely(regs->xcs == __ESPFIX_CS))
+	{
+		regs->xcs = __KERNEL_CS;
+	}
+
 	fixup = search_exception_tables(regs->eip);
 	if (fixup) {
 		regs->eip = fixup->fixup;
diff -urN linux-2.6.8-pcsp/arch/i386/mm/fault.c linux-2.6.8-stacks/arch/i386/mm/fault.c
--- linux-2.6.8-pcsp/arch/i386/mm/fault.c	2004-08-10 11:02:37.000000000 +0400
+++ linux-2.6.8-stacks/arch/i386/mm/fault.c	2004-09-21 16:03:28.000000000 +0400
@@ -77,7 +77,7 @@
 	u32 seg_ar, seg_limit, base, *desc;
 
 	/* The standard kernel/user address space limit. */
-	*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
+	*eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
 
 	/* Unlikely, but must come before segment checks. */
 	if (unlikely((regs->eflags & VM_MASK) != 0))
diff -urN linux-2.6.8-pcsp/include/asm-i386/segment.h linux-2.6.8-stacks/include/asm-i386/segment.h
--- linux-2.6.8-pcsp/include/asm-i386/segment.h	2004-01-09 09:59:19.000000000 +0300
+++ linux-2.6.8-stacks/include/asm-i386/segment.h	2004-09-21 14:57:44.000000000 +0400
@@ -38,8 +38,8 @@
  *  24 - APM BIOS support
  *  25 - APM BIOS support 
  *
- *  26 - unused
- *  27 - unused
+ *  26 - ESPfix Ring-1 trampoline CS
+ *  27 - ESPfix Ring-1 small SS
  *  28 - unused
  *  29 - unused
  *  30 - unused
@@ -71,14 +71,21 @@
 #define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
 #define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
 
+#define GDT_ENTRY_ESPFIX_CS		(GDT_ENTRY_KERNEL_BASE + 14)
+#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 15)
+#define __ESPFIX_CS (GDT_ENTRY_ESPFIX_CS * 8 + 1)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8 + 1)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*
  * The GDT has 32 entries
  */
-#define GDT_ENTRIES 32
-
-#define GDT_SIZE (GDT_ENTRIES * 8)
+#define GDT_ENTRIES_SHIFT 5
+#define GDT_ENTRIES (1 << GDT_ENTRIES_SHIFT)
+#define GDT_ENTRY_SHIFT 3
+#define GDT_SIZE_SHIFT (GDT_ENTRIES_SHIFT + GDT_ENTRY_SHIFT)
+#define GDT_SIZE (1 << GDT_SIZE_SHIFT)
 
 /* Simple and small GDT entries for booting only */
 

  reply	other threads:[~2004-09-22 18:41 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-16 18:39 ESP corruption bug - what CPUs are affected? Petr Vandrovec
2004-09-17 18:12 ` Stas Sergeev
2004-09-18 16:45 ` Stas Sergeev
2004-09-18 16:59   ` Petr Vandrovec
2004-09-18 19:14     ` Stas Sergeev
2004-09-18 20:35       ` Petr Vandrovec
2004-09-22 18:49         ` Stas Sergeev [this message]
2004-09-22 19:19           ` Richard B. Johnson
2004-09-22 20:03             ` Stas Sergeev
2004-09-22 20:13               ` Richard B. Johnson
2004-09-28 15:43                 ` Denis Vlasenko
2004-09-22 20:02           ` Petr Vandrovec
2004-09-23  4:09             ` Stas Sergeev
2004-09-23 17:08             ` Stas Sergeev
2004-09-23 18:06               ` Petr Vandrovec
2004-09-24 20:36                 ` Stas Sergeev
2004-09-24 21:43                   ` Petr Vandrovec
2004-09-25  8:04                     ` Gabriel Paubert
2004-09-25 12:25                       ` Stas Sergeev
2004-09-25 19:18                         ` Gabriel Paubert
2004-09-25 20:40                           ` Stas Sergeev
2004-09-25 23:42                             ` Gabriel Paubert
2004-09-26 18:04                               ` Stas Sergeev
2004-09-27  9:07                                 ` Gabriel Paubert
2004-09-30 15:11                               ` Bill Davidsen
2004-10-06 16:18                     ` ESP corruption bug - what CPUs are affected? (patch attempts) Stas Sergeev
  -- strict thread matches above, loose matches on Subject: below --
2004-10-06 17:18 ESP corruption bug - what CPUs are affected? (patch att Petr Vandrovec
2004-10-11 18:32 ` ESP corruption bug - what CPUs are affected? Stas Sergeev
2004-09-16 17:49 Stas Sergeev
2004-09-16 19:03 ` Denis Vlasenko
2004-09-17 18:13   ` Stas Sergeev
2004-09-17 22:04     ` Denis Vlasenko
2004-09-18 10:58       ` Stas Sergeev
2004-09-18 13:08         ` Denis Vlasenko
2004-09-18 17:05           ` Stas Sergeev
     [not found]             ` <200409190108.45641.vda@port.imtp.ilyichevsk.odessa.ua>
2004-09-22 19:05               ` Stas Sergeev
2004-09-21 11:19         ` Pavel Machek
2004-09-21 11:43           ` Denis Vlasenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4151C949.1080807@aknet.ru \
    --to=stsp@aknet.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=vandrove@vc.cvut.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.