All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stas Sergeev <stsp@aknet.ru>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Andrew Morton <akpm@osdl.org>,
	Linux kernel <linux-kernel@vger.kernel.org>,
	Petr Vandrovec <VANDROVE@vc.cvut.cz>
Subject: Re: [patch] x86: fix ESP corruption CPU bug
Date: Tue, 04 Jan 2005 04:58:34 +0300	[thread overview]
Message-ID: <41D9F84A.6090402@aknet.ru> (raw)
In-Reply-To: <Pine.LNX.4.58.0501031557250.2294@ppc970.osdl.org>

[-- Attachment #1: Type: text/plain, Size: 1075 bytes --]

Hi Linus.

Linus Torvalds wrote:
> Please don't do it like this - you made the patch now depend on the 
> ugliest code in the universe, namely that horribly crappy kgdb-ga sh*t 
I didn't do that. I just re-targeted the
patch to -mm tree and it clashed with the
kgdb-ga patch. And somehow it happened
that a few lines I would have to insert
myself otherwise anyway, appeared to be
already there for me to re-use. I wouldn't
call that a dependancy. Only 3 lines are
re-used in fact.

> The 16-bit stack code may not be the prettiest either, but it doesn't hold 
> a candle to the asm-crap that is entry.S after kgdb-ga.
>From what I can see, kgdb-ga have only
3 small hunks in entry.S, so the crap is
probably very dense there.
Clashing into one of these hunks looks
unavoidable for my needs.

> "resume_kernelX"? What crud.
Does "restore_nocheck" sound better?
Yes, maybe, but then I don't see the way
to provide my patch for -mm. So the attached
one is for plain 2.6.10. I don't know how
Andrew can apply it, so maybe you will?


Signed-off-by: Stas Sergeev <stsp@aknet.ru>


[-- Attachment #2: linux-2.6.10-stk5.diff --]
[-- Type: text/x-patch, Size: 11056 bytes --]

diff -urN linux-2.6.10/arch/i386/kernel/cpu/common.c linux-2.6.10-stk/arch/i386/kernel/cpu/common.c
--- linux-2.6.10/arch/i386/kernel/cpu/common.c	2004-10-21 21:35:21.000000000 +0400
+++ linux-2.6.10-stk/arch/i386/kernel/cpu/common.c	2004-12-31 11:16:28.000000000 +0300
@@ -16,6 +16,10 @@
 DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
 
+unsigned char cpu_16bit_stack[CPU_16BIT_STACK_SIZE];
+DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+
 static int cachesize_override __initdata = -1;
 static int disable_x86_fxsr __initdata = 0;
 static int disable_x86_serial_nr __initdata = 1;
@@ -508,6 +512,7 @@
 	int cpu = smp_processor_id();
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
+	__u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
 
 	if (test_and_set_bit(cpu, &cpu_initialized)) {
 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
@@ -530,6 +535,13 @@
 	 */
 	memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table,
 	       GDT_SIZE);
+
+	/* Set up GDT entry for 16bit stack */
+	*(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |=
+		((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
+		((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
+		(CPU_16BIT_STACK_SIZE - 1);
+
 	cpu_gdt_descr[cpu].size = GDT_SIZE - 1;
 	cpu_gdt_descr[cpu].address =
 	    (unsigned long)&per_cpu(cpu_gdt_table, cpu);
diff -urN linux-2.6.10/arch/i386/kernel/entry.S linux-2.6.10-stk/arch/i386/kernel/entry.S
--- linux-2.6.10/arch/i386/kernel/entry.S	2004-12-26 00:37:55.000000000 +0300
+++ linux-2.6.10-stk/arch/i386/kernel/entry.S	2004-12-31 11:30:18.000000000 +0300
@@ -47,6 +47,7 @@
 #include <asm/segment.h>
 #include <asm/smp.h>
 #include <asm/page.h>
+#include <asm/desc.h>
 #include "irq_vectors.h"
 
 #define nr_syscalls ((syscall_table_size)/4)
@@ -78,7 +79,7 @@
 #define preempt_stop		cli
 #else
 #define preempt_stop
-#define resume_kernel		restore_all
+#define resume_kernel		restore_nocheck
 #endif
 
 #define SAVE_ALL \
@@ -122,24 +123,6 @@
 .previous
 
 
-#define RESTORE_ALL	\
-	RESTORE_REGS	\
-	addl $4, %esp;	\
-1:	iret;		\
-.section .fixup,"ax";   \
-2:	sti;		\
-	movl $(__USER_DS), %edx; \
-	movl %edx, %ds; \
-	movl %edx, %es; \
-	movl $11,%eax;	\
-	call do_exit;	\
-.previous;		\
-.section __ex_table,"a";\
-	.align 4;	\
-	.long 1b,2b;	\
-.previous
-
-
 ENTRY(ret_from_fork)
 	pushl %eax
 	call schedule_tail
@@ -163,7 +146,7 @@
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb CS(%esp), %al
 	testl $(VM_MASK | 3), %eax
-	jz resume_kernel		# returning to kernel or vm86-space
+	jz resume_kernel
 ENTRY(resume_userspace)
  	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
@@ -177,7 +160,7 @@
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
-	jnz restore_all
+	jnz restore_nocheck
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
@@ -192,6 +175,31 @@
 	jmp need_resched
 #endif
 
+ldt_ss:
+	larl OLDSS(%esp), %eax
+	jnz restore_nocheck
+	testl $0x00400000, %eax		# returning to 32bit stack?
+	jnz restore_nocheck		# allright, normal return
+	/* If returning to userspace with 16bit stack,
+	 * try to fix the higher word of ESP, as the CPU
+	 * won't restore it.
+	 * This is an "official" bug of all the x86-compatible
+	 * CPUs, which we can try to work around to make
+	 * dosemu and wine happy. */
+	subl $8, %esp		# reserve space for switch16 pointer
+	cli
+	movl %esp, %eax
+	/* Set up the 16bit stack frame with switch32 pointer on top,
+	 * and a switch16 pointer on top of the current frame. */
+	call setup_x86_bogus_stack
+	RESTORE_REGS
+	lss 20+4(%esp), %esp	# The magic pointer above iret frame
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
@@ -260,8 +268,32 @@
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
+
 restore_all:
-	RESTORE_ALL
+	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
+	movb CS(%esp), %al
+	andl $(VM_MASK | 3), %eax
+	cmpl $3, %eax
+	jne restore_nocheck		# returning to kernel or vm86-space
+	testl $4, OLDSS(%esp)
+	jnz ldt_ss
+restore_nocheck:
+	RESTORE_REGS
+	addl $4, %esp
+1:	iret
+.section .fixup,"ax"
+iret_exc:
+	sti
+	movl $(__USER_DS), %edx
+	movl %edx, %ds
+	movl %edx, %es
+	movl $11,%eax
+	call do_exit
+.previous
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
 
 	# perform work that needs to be done immediately before resumption
 	ALIGN
@@ -337,6 +369,25 @@
 	movl $-ENOSYS,EAX(%esp)
 	jmp resume_userspace
 
+/* Check if we are on 16bit stack. Can happen either on iret of ESPFIX,
+ * or in an exception handler after that iret... */
+#define FIXUP_ESPFIX_STACK \
+	movl %esp, %eax; \
+	/* Magic pointer is at the top of the 16bit stack */ \
+	lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
+	call fixup_x86_bogus_stack; \
+	movl %eax, %esp;
+#define UNWIND_ESPFIX_STACK \
+	pushl %eax; \
+	movl %ss, %eax; \
+	cmpw $__ESPFIX_SS, %ax; \
+	jne 28f; \
+	movl $(__KERNEL_DS), %edx; \
+	movl %edx, %ds; \
+	movl %edx, %es; \
+	FIXUP_ESPFIX_STACK \
+28:	popl %eax;
+
 /*
  * Build the entry stubs and pointer table with
  * some assembler magic.
@@ -391,7 +442,9 @@
 	pushl %ecx
 	pushl %ebx
 	cld
-	movl %es, %ecx
+	pushl %es
+	UNWIND_ESPFIX_STACK
+	popl %ecx
 	movl ES(%esp), %edi		# get the function address
 	movl ORIG_EAX(%esp), %edx	# get the error code
 	movl %eax, ORIG_EAX(%esp)
@@ -473,6 +526,11 @@
  * fault happened on the sysenter path.
  */
 ENTRY(nmi)
+	pushl %eax
+	movl %ss, %eax
+	cmpw $__ESPFIX_SS, %ax
+	popl %eax
+	je nmi_16bit_stack
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
 	pushl %eax
@@ -492,7 +550,7 @@
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
-	RESTORE_ALL
+	jmp restore_all
 
 nmi_stack_fixup:
 	FIX_STACK(12,nmi_stack_correct, 1)
@@ -508,6 +566,29 @@
 	FIX_STACK(24,nmi_stack_correct, 1)
 	jmp nmi_stack_correct
 
+nmi_16bit_stack:
+	/* create the pointer to lss back */
+	pushl %ss
+	pushl %esp
+	andl $0xffff, %esp
+	addw $4, (%esp)
+	/* copy the iret frame of 12 bytes */
+	.rept 3
+	pushl 16(%esp)
+	.endr
+	pushl %eax
+	SAVE_ALL
+	FIXUP_ESPFIX_STACK		# %eax == %esp
+	xorl %edx,%edx			# zero error code
+	call do_nmi
+	RESTORE_REGS
+	lss 12+4(%esp), %esp		# back to 16bit stack
+1:	iret
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
+
 ENTRY(int3)
 	pushl $-1			# mark this as an int
 	SAVE_ALL
diff -urN linux-2.6.10/arch/i386/kernel/head.S linux-2.6.10-stk/arch/i386/kernel/head.S
--- linux-2.6.10/arch/i386/kernel/head.S	2004-10-21 21:35:21.000000000 +0400
+++ linux-2.6.10-stk/arch/i386/kernel/head.S	2005-01-01 13:02:39.000000000 +0300
@@ -514,7 +514,7 @@
 	.quad 0x00009a0000000000	/* 0xc0 APM CS 16 code (16 bit) */
 	.quad 0x0040920000000000	/* 0xc8 APM DS    data */
 
-	.quad 0x0000000000000000	/* 0xd0 - unused */
+	.quad 0x0000920000000000	/* 0xd0 - ESPFIX 16-bit SS */
 	.quad 0x0000000000000000	/* 0xd8 - unused */
 	.quad 0x0000000000000000	/* 0xe0 - unused */
 	.quad 0x0000000000000000	/* 0xe8 - unused */
diff -urN linux-2.6.10/arch/i386/kernel/traps.c linux-2.6.10-stk/arch/i386/kernel/traps.c
--- linux-2.6.10/arch/i386/kernel/traps.c	2004-12-26 00:37:31.000000000 +0300
+++ linux-2.6.10-stk/arch/i386/kernel/traps.c	2004-12-31 11:27:02.000000000 +0300
@@ -904,6 +904,51 @@
 #endif
 }
 
+fastcall void setup_x86_bogus_stack(unsigned char * stk)
+{
+	unsigned long *switch16_ptr, *switch32_ptr;
+	struct pt_regs *regs;
+	unsigned long stack_top, stack_bot;
+	unsigned short iret_frame16_off;
+	int cpu = smp_processor_id();
+	/* reserve the space on 32bit stack for the magic switch16 pointer */
+	memmove(stk, stk + 8, sizeof(struct pt_regs));
+	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
+	regs = (struct pt_regs *)stk;
+	/* now the switch32 on 16bit stack */
+	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
+	switch32_ptr = (unsigned long *)(stack_top - 8);
+	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
+	/* copy iret frame on 16bit stack */
+	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
+	/* fill in the switch pointers */
+	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
+	switch16_ptr[1] = __ESPFIX_SS;
+	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
+		8 - CPU_16BIT_STACK_SIZE;
+	switch32_ptr[1] = __KERNEL_DS;
+}
+
+fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
+{
+	unsigned long *switch32_ptr;
+	unsigned char *stack16, *stack32;
+	unsigned long stack_top, stack_bot;
+	int len;
+	int cpu = smp_processor_id();
+	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
+	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
+	switch32_ptr = (unsigned long *)(stack_top - 8);
+	/* copy the data from 16bit stack to 32bit stack */
+	len = CPU_16BIT_STACK_SIZE - 8 - sp;
+	stack16 = (unsigned char *)(stack_bot + sp);
+	stack32 = (unsigned char *)
+		(switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
+	memcpy(stack32, stack16, len);
+	return stack32;
+}
+
 /*
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
diff -urN linux-2.6.10/include/asm-i386/desc.h linux-2.6.10-stk/include/asm-i386/desc.h
--- linux-2.6.10/include/asm-i386/desc.h	2004-10-21 21:35:55.000000000 +0400
+++ linux-2.6.10-stk/include/asm-i386/desc.h	2005-01-01 13:02:39.000000000 +0300
@@ -4,6 +4,8 @@
 #include <asm/ldt.h>
 #include <asm/segment.h>
 
+#define CPU_16BIT_STACK_SIZE 1024
+
 #ifndef __ASSEMBLY__
 
 #include <linux/preempt.h>
@@ -15,6 +17,9 @@
 extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
 DECLARE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
 
+extern unsigned char cpu_16bit_stack[CPU_16BIT_STACK_SIZE];
+DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
+
 struct Xgt_desc_struct {
 	unsigned short size;
 	unsigned long address __attribute__((packed));
diff -urN linux-2.6.10/include/asm-i386/segment.h linux-2.6.10-stk/include/asm-i386/segment.h
--- linux-2.6.10/include/asm-i386/segment.h	2004-01-09 09:59:19.000000000 +0300
+++ linux-2.6.10-stk/include/asm-i386/segment.h	2005-01-01 13:02:39.000000000 +0300
@@ -38,7 +38,7 @@
  *  24 - APM BIOS support
  *  25 - APM BIOS support 
  *
- *  26 - unused
+ *  26 - ESPFIX small SS
  *  27 - unused
  *  28 - unused
  *  29 - unused
@@ -71,6 +71,9 @@
 #define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
 #define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
 
+#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
+
 #define GDT_ENTRY_DOUBLEFAULT_TSS	31
 
 /*

  reply	other threads:[~2005-01-04  1:58 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-01-03 23:39 [patch] x86: fix ESP corruption CPU bug Stas Sergeev
2005-01-04  0:01 ` Linus Torvalds
2005-01-04  1:58   ` Stas Sergeev [this message]
  -- strict thread matches above, loose matches on Subject: below --
2005-03-13 18:20 Stas Sergeev
2005-03-13 18:52 ` Grzegorz Kulewski
2005-03-13 19:11   ` Stas Sergeev
2005-03-13 19:37     ` Ondrej Zary
2005-03-13 19:46       ` Stas Sergeev
2005-03-13 20:02   ` Pavel Machek
2005-03-13 20:10 ` Pavel Machek
2005-03-13 20:55   ` Stas Sergeev
2005-03-13 21:13     ` Linus Torvalds
2005-03-13 23:17       ` Pavel Machek
2005-03-13 23:54         ` Linus Torvalds
2005-03-14  0:16       ` Linus Torvalds
2005-03-14  4:52         ` Stas Sergeev
2005-03-14  9:34           ` Andi Kleen
2005-03-14 15:21             ` Jakob Eriksson
2005-03-14 17:03               ` linux-os
2005-03-14 17:10                 ` Pavel Machek
2005-03-14 19:24                 ` Brian Gerst
2005-03-14 20:21                   ` Stas Sergeev
2005-03-14 18:02               ` Stas Sergeev
2005-03-14 17:29             ` Stas Sergeev
2005-03-14 11:10 Zoltan Boszormenyi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=41D9F84A.6090402@aknet.ru \
    --to=stsp@aknet.ru \
    --cc=VANDROVE@vc.cvut.cz \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.