public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] double fault enhancements
@ 2006-01-30  9:54 Jan Beulich
  2006-01-30  9:58 ` Arjan van de Ven
  2006-01-30 10:08 ` (correction) " Jan Beulich
  0 siblings, 2 replies; 5+ messages in thread
From: Jan Beulich @ 2006-01-30  9:54 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 662 bytes --]

From: Jan Beulich <jbeulich@novell.com>

Make the double fault handler use CPU-specific stacks. Add some
abstraction to simplify future change of other exception handlers to go
through task gates. Change the pointer validity checks in the double
fault handler to account for the fact that both GDT and TSS aren't in
static kernel space anymore. Add a new notification of the event
through the die notifier chain, also providing some environmental
adjustments so that various infrastructural things work independent of
the fact that the fault and the callbacks are running on other then the
normal kernel stack.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>


[-- Attachment #2: linux-2.6.16-rc1-i386-doublefault.patch --]
[-- Type: text/plain, Size: 11742 bytes --]

From: Jan Beulich <jbeulich@novell.com>

Make the double fault handler use CPU-specific stacks. Add some
abstraction to simplify future change of other exception handlers to go
through task gates. Change the pointer validity checks in the double
fault handler to account for the fact that both GDT and TSS aren't in
static kernel space anymore. Add a new notification of the event
through the die notifier chain, also providing some environmental
adjustments so that various infrastructural things work independent of
the fact that the fault and the callbacks are running on other then the
normal kernel stack.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>

diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/cpu/common.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/cpu/common.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/cpu/common.c	2006-01-18 12:38:24.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/cpu/common.c	2006-01-25 11:15:51.000000000 +0100
@@ -4,6 +4,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/bootmem.h>
 #include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -560,6 +561,7 @@ void __init early_cpu_init(void)
 void __devinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
+	unsigned i;
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
@@ -612,9 +614,54 @@ void __devinit cpu_init(void)
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
-#ifdef CONFIG_DOUBLEFAULT
-	/* Set up doublefault TSS pointer in the GDT */
-	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#ifdef N_EXCEPTION_TSS
+# if EXCEPTION_STACK_ORDER > THREAD_ORDER
+#  error Assertion failed: EXCEPTION_STACK_ORDER <= THREAD_ORDER
+# endif
+	for (i = 0; i < N_EXCEPTION_TSS; ++i) {
+		unsigned long stack;
+
+		/* Set up exception handling TSS */
+		exception_tss[cpu][i].ebx = (unsigned long)&exception_tss[cpu][i];
+
+		/* Set up exception handling stacks */
+# ifdef CONFIG_SMP
+		if (cpu) {
+			stack = __get_free_pages(GFP_ATOMIC, THREAD_ORDER);
+			if (!stack)
+				panic("Cannot allocate exception stack %u %d\n",
+				      i,
+				      cpu);
+		}
+		else
+# endif
+			stack = (unsigned long)__alloc_bootmem(EXCEPTION_STKSZ,
+			                                       THREAD_SIZE,
+			                                       __pa(MAX_DMA_ADDRESS));
+		stack += EXCEPTION_STKSZ;
+		exception_tss[cpu][i].esp = exception_tss[cpu][i].esp0 = stack;
+# ifdef CONFIG_SMP
+		if (cpu) {
+			unsigned j;
+
+			for (j = EXCEPTION_STACK_ORDER; j < THREAD_ORDER; ++j) {
+				/* set_page_refs sets the page count only for the first
+				   page, but since we split the larger-order page here,
+				   we need to adjust the page count before freeing the
+				   pieces. */
+				struct page * page = virt_to_page((void *)stack);
+
+				BUG_ON(page_count(page));
+				set_page_count(page, 1);
+				free_pages(stack, j);
+				stack += (PAGE_SIZE << j);
+			}
+		}
+# endif
+
+		/* Set up exception handling TSS pointer in the GDT */
+		__set_tss_desc(cpu, GDT_ENTRY_EXCEPTION_TSS + i, &exception_tss[cpu][i]);
+	}
 #endif
 
 	/* Clear %fs and %gs. */
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/doublefault.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/doublefault.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/doublefault.c	2006-01-03 04:21:10.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/doublefault.c	2006-01-25 11:36:53.000000000 +0100
@@ -8,58 +8,81 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
+#include <asm/kdebug.h>
 
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+extern unsigned long max_low_pfn;
+#define ptr_ok(x, l) ((x) >= PAGE_OFFSET \
+                      && (x) + (l) <= PAGE_OFFSET + max_low_pfn * PAGE_SIZE - 1)
 
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+#define THREAD_INFO_FROM(x) ((struct thread_info *)((x) & ~(THREAD_SIZE - 1)))
 
-static void doublefault_fn(void)
+register const struct tss_struct *self __asm__("ebx");
+
+void doublefault_fn(void)
 {
-	struct Xgt_desc_struct gdt_desc = {0, 0};
+	struct Xgt_desc_struct gdt_desc;
 	unsigned long gdt, tss;
 
 	store_gdt(&gdt_desc);
 	gdt = gdt_desc.address;
 
-	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
+	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size + 1);
 
-	if (ptr_ok(gdt)) {
+	if (ptr_ok(gdt, gdt_desc.size)) {
 		gdt += GDT_ENTRY_TSS << 3;
 		tss = *(u16 *)(gdt+2);
 		tss += *(u8 *)(gdt+4) << 16;
 		tss += *(u8 *)(gdt+7) << 24;
 		printk("double fault, tss at %08lx\n", tss);
 
-		if (ptr_ok(tss)) {
-			struct tss_struct *t = (struct tss_struct *)tss;
+		if (ptr_ok(tss, *(u16 *)gdt)) {
+			const struct tss_struct *t = (struct tss_struct *)tss;
+			struct {
+				struct pt_regs common;
+				struct {
+					unsigned long es;
+					unsigned long ds;
+					unsigned long fs;
+					unsigned long gs;
+				} vm86;
+			} regs;
+
+			/* for current/current_thread_info to work... */
+			*THREAD_INFO_FROM(self->esp) = *THREAD_INFO_FROM(t->esp0 - 1);
 
 			printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
 
 			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
 				t->eax, t->ebx, t->ecx, t->edx);
-			printk("esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+			printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
+				t->esi, t->edi, t->ebp);
+
+			regs.common.ebx = t->ebx;
+			regs.common.ecx = t->ecx;
+			regs.common.edx = t->edx;
+			regs.common.esi = t->esi;
+			regs.common.edi = t->edi;
+			regs.common.ebp = t->ebp;
+			regs.common.eax = t->eax;
+			regs.common.xds = t->ds;
+			regs.common.xes = t->es;
+			regs.common.orig_eax = -1;
+			regs.common.eip = t->eip;
+			regs.common.xcs = t->cs;
+			regs.common.eflags = t->eflags;
+			regs.common.esp = t->esp;
+			regs.common.xss = t->ss;
+			if (t->eflags & X86_EFLAGS_VM) {
+				regs.common.xds = 0;
+				regs.common.xes = 0;
+				regs.vm86.es = t->es;
+				regs.vm86.ds = t->ds;
+				regs.vm86.fs = t->fs;
+				regs.vm86.gs = t->gs;
+			}
+			notify_die(DIE_DOUBLE_FAULT, "double fault", &regs.common, 0, 8, SIGKILL);
 		}
 	}
 
 	for (;;) /* nothing */;
 }
-
-struct tss_struct doublefault_tss __cacheline_aligned = {
-	.esp0		= STACK_START,
-	.ss0		= __KERNEL_DS,
-	.ldt		= 0,
-	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
-
-	.eip		= (unsigned long) doublefault_fn,
-	.eflags		= X86_EFLAGS_SF | 0x2,	/* 0x2 bit is always set */
-	.esp		= STACK_START,
-	.es		= __USER_DS,
-	.cs		= __KERNEL_CS,
-	.ss		= __KERNEL_DS,
-	.ds		= __USER_DS,
-
-	.__cr3		= __pa(swapper_pg_dir)
-};
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/traps.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/traps.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/traps.c	2006-01-18 12:38:24.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/traps.c	2006-01-26 17:24:15.000000000 +0100
@@ -61,6 +61,26 @@ asmlinkage int system_call(void);
 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
 		{ 0, 0 }, { 0, 0 } };
 
+void doublefault_fn(void);
+
+#ifdef N_EXCEPTION_TSS
+struct tss_struct exception_tss[NR_CPUS][N_EXCEPTION_TSS] __cacheline_aligned = {
+	[0 ... NR_CPUS-1] = {
+		[0 ... N_EXCEPTION_TSS-1] = {
+			.cs       = __KERNEL_CS,
+			.ss       = __KERNEL_DS,
+			.ss0      = __KERNEL_DS,
+			.__cr3    = __pa(swapper_pg_dir),
+			.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+			.ds       = __USER_DS,
+			.es       = __USER_DS,
+			.eflags	  = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
+		},
+		[DOUBLEFAULT_TSS].eip = (unsigned long)doublefault_fn
+	}
+};
+#endif
+
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
 
@@ -1111,7 +1131,9 @@ void __init trap_init(void)
 	set_trap_gate(5,&bounds);
 	set_trap_gate(6,&invalid_op);
 	set_trap_gate(7,&device_not_available);
-	set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
+#ifdef DOUBLEFAULT_TSS
+	set_task_gate(8,GDT_ENTRY_EXCEPTION_TSS + DOUBLEFAULT_TSS);
+#endif
 	set_trap_gate(9,&coprocessor_segment_overrun);
 	set_trap_gate(10,&invalid_TSS);
 	set_trap_gate(11,&segment_not_present);
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/kdebug.h 2.6.16-rc1-i386-doublefault/include/asm-i386/kdebug.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/kdebug.h	2006-01-03 04:21:10.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/kdebug.h	2006-01-27 16:29:53.000000000 +0100
@@ -39,6 +39,7 @@ enum die_val {
 	DIE_CALL,
 	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
+	DIE_DOUBLE_FAULT
 };
 
 static inline int notify_die(enum die_val val, const char *str,
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/processor.h 2.6.16-rc1-i386-doublefault/include/asm-i386/processor.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/processor.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/processor.h	2006-01-25 17:08:53.000000000 +0100
@@ -90,7 +90,9 @@ struct cpuinfo_x86 {
 
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
+#ifdef N_EXCEPTION_TSS
+extern struct tss_struct exception_tss[NR_CPUS][N_EXCEPTION_TSS];
+#endif
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
@@ -486,6 +488,13 @@ struct thread_struct {
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
+#ifndef CONFIG_NLKD_FTA
+# define EXCEPTION_STACK_ORDER 0
+#else
+# define EXCEPTION_STACK_ORDER THREAD_ORDER
+#endif
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
 static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	tss->esp0 = thread->esp0;
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/segment.h 2.6.16-rc1-i386-doublefault/include/asm-i386/segment.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/segment.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/segment.h	2006-01-25 17:09:55.000000000 +0100
@@ -43,7 +43,8 @@
  *  28 - unused
  *  29 - unused
  *  30 - unused
- *  31 - TSS for double fault handler
+ *  31 - TSS for first exception handler (double fault)
+ *  32+  TSSes for further exception handlers
  */
 #define GDT_ENTRY_TLS_ENTRIES	3
 #define GDT_ENTRY_TLS_MIN	6
@@ -74,12 +75,18 @@
 #define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
 
-#define GDT_ENTRY_DOUBLEFAULT_TSS	31
+#define GDT_ENTRY_EXCEPTION_TSS	31
+#ifdef CONFIG_DOUBLEFAULT
+# define DOUBLEFAULT_TSS 0
+# define N_EXCEPTION_TSS 1
+#else
+# undef GDT_ENTRY_EXCEPTION_TSS
+#endif
 
 /*
- * The GDT has 32 entries
+ * The GDT has 32+ entries
  */
-#define GDT_ENTRIES 32
+#define GDT_ENTRIES (31 + N_EXCEPTION_TSS)
 
 #define GDT_SIZE (GDT_ENTRIES * 8)
 
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/thread_info.h 2.6.16-rc1-i386-doublefault/include/asm-i386/thread_info.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/thread_info.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/thread_info.h	2006-01-25 10:41:49.000000000 +0100
@@ -54,10 +54,11 @@ struct thread_info {
 
 #define PREEMPT_ACTIVE		0x10000000
 #ifdef CONFIG_4KSTACKS
-#define THREAD_SIZE            (4096)
+#define THREAD_ORDER 0
 #else
-#define THREAD_SIZE		(8192)
+#define THREAD_ORDER 1
 #endif
+#define THREAD_SIZE (4096 << THREAD_ORDER)
 
 #define STACK_WARN             (THREAD_SIZE/8)
 /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] double fault enhancements
  2006-01-30  9:54 [PATCH] double fault enhancements Jan Beulich
@ 2006-01-30  9:58 ` Arjan van de Ven
  2006-01-30 10:12   ` Jan Beulich
  2006-01-30 10:08 ` (correction) " Jan Beulich
  1 sibling, 1 reply; 5+ messages in thread
From: Arjan van de Ven @ 2006-01-30  9:58 UTC (permalink / raw)
  To: Jan Beulich; +Cc: linux-kernel

On Mon, 2006-01-30 at 10:54 +0100, Jan Beulich wrote:
> From: Jan Beulich <jbeulich@novell.com>
> 
> Make the double fault handler use CPU-specific stacks. Add some
> abstraction to simplify future change of other exception handlers to go
> through task gates. Change the pointer validity checks in the double
> fault handler to account for the fact that both GDT and TSS aren't in
> static kernel space anymore. Add a new notification of the event
> through the die notifier chain, also providing some environmental
> adjustments so that various infrastructural things work independent of
> the fact that the fault and the callbacks are running on other then the
> normal kernel stack.

(the way you sent this patch means that it's not possible to reply
inline to the patch. Please fix your mailer for this)

I would hope TSS and such remain in the kernel static space, because
those are the kind of things I'd like to be read only over time...

Also the last chunk of your patch has nothing to do with what you
describe here... and seems sort of suprious. (it might be a useful
cleanup, but it should be independent)


^ permalink raw reply	[flat|nested] 5+ messages in thread

* (correction) [PATCH] double fault enhancements
  2006-01-30  9:54 [PATCH] double fault enhancements Jan Beulich
  2006-01-30  9:58 ` Arjan van de Ven
@ 2006-01-30 10:08 ` Jan Beulich
  1 sibling, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2006-01-30 10:08 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 863 bytes --]

I'm sorry for having to resend this, but I mistakenly sent an older version of the patch. The correct one is attached
now, fixing a warning and a build problem when CONFIG_DOUBLEFAULT is not defined.

From: Jan Beulich <jbeulich@novell.com>

Make the double fault handler use CPU-specific stacks. Add some
abstraction to simplify future change of other exception handlers to go
through task gates. Change the pointer validity checks in the double
fault handler to account for the fact that both GDT and TSS aren't in
static kernel space anymore. Add a new notification of the event
through the die notifier chain, also providing some environmental
adjustments so that various infrastructural things work independent of
the fact that the fault and the callbacks are running on other then the
normal kernel stack.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>


[-- Attachment #2: linux-2.6.16-rc1-i386-doublefault.patch --]
[-- Type: text/plain, Size: 12117 bytes --]

From: Jan Beulich <jbeulich@novell.com>

Make the double fault handler use CPU-specific stacks. Add some
abstraction to simplify future change of other exception handlers to go
through task gates. Change the pointer validity checks in the double
fault handler to account for the fact that both GDT and TSS aren't in
static kernel space anymore. Add a new notification of the event
through the die notifier chain, also providing some environmental
adjustments so that various infrastructural things work independent of
the fact that the fault and the callbacks are running on other then the
normal kernel stack.

Signed-Off-By: Jan Beulich <jbeulich@novell.com>

diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/cpu/common.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/cpu/common.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/cpu/common.c	2006-01-18 12:38:24.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/cpu/common.c	2006-01-25 11:15:51.000000000 +0100
@@ -4,6 +4,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 #include <linux/percpu.h>
+#include <linux/bootmem.h>
 #include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
@@ -560,6 +561,7 @@ void __init early_cpu_init(void)
 void __devinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
+	unsigned i;
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &current->thread;
 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
@@ -612,9 +614,54 @@ void __devinit cpu_init(void)
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
-#ifdef CONFIG_DOUBLEFAULT
-	/* Set up doublefault TSS pointer in the GDT */
-	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#ifdef N_EXCEPTION_TSS
+# if EXCEPTION_STACK_ORDER > THREAD_ORDER
+#  error Assertion failed: EXCEPTION_STACK_ORDER <= THREAD_ORDER
+# endif
+	for (i = 0; i < N_EXCEPTION_TSS; ++i) {
+		unsigned long stack;
+
+		/* Set up exception handling TSS */
+		exception_tss[cpu][i].ebx = (unsigned long)&exception_tss[cpu][i];
+
+		/* Set up exception handling stacks */
+# ifdef CONFIG_SMP
+		if (cpu) {
+			stack = __get_free_pages(GFP_ATOMIC, THREAD_ORDER);
+			if (!stack)
+				panic("Cannot allocate exception stack %u %d\n",
+				      i,
+				      cpu);
+		}
+		else
+# endif
+			stack = (unsigned long)__alloc_bootmem(EXCEPTION_STKSZ,
+			                                       THREAD_SIZE,
+			                                       __pa(MAX_DMA_ADDRESS));
+		stack += EXCEPTION_STKSZ;
+		exception_tss[cpu][i].esp = exception_tss[cpu][i].esp0 = stack;
+# ifdef CONFIG_SMP
+		if (cpu) {
+			unsigned j;
+
+			for (j = EXCEPTION_STACK_ORDER; j < THREAD_ORDER; ++j) {
+				/* set_page_refs sets the page count only for the first
+				   page, but since we split the larger-order page here,
+				   we need to adjust the page count before freeing the
+				   pieces. */
+				struct page * page = virt_to_page((void *)stack);
+
+				BUG_ON(page_count(page));
+				set_page_count(page, 1);
+				free_pages(stack, j);
+				stack += (PAGE_SIZE << j);
+			}
+		}
+# endif
+
+		/* Set up exception handling TSS pointer in the GDT */
+		__set_tss_desc(cpu, GDT_ENTRY_EXCEPTION_TSS + i, &exception_tss[cpu][i]);
+	}
 #endif
 
 	/* Clear %fs and %gs. */
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/doublefault.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/doublefault.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/doublefault.c	2006-01-03 04:21:10.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/doublefault.c	2006-01-25 11:36:53.000000000 +0100
@@ -8,58 +8,81 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/desc.h>
+#include <asm/kdebug.h>
 
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
+extern unsigned long max_low_pfn;
+#define ptr_ok(x, l) ((x) >= PAGE_OFFSET \
+                      && (x) + (l) <= PAGE_OFFSET + max_low_pfn * PAGE_SIZE - 1)
 
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+#define THREAD_INFO_FROM(x) ((struct thread_info *)((x) & ~(THREAD_SIZE - 1)))
 
-static void doublefault_fn(void)
+register const struct tss_struct *self __asm__("ebx");
+
+void doublefault_fn(void)
 {
-	struct Xgt_desc_struct gdt_desc = {0, 0};
+	struct Xgt_desc_struct gdt_desc;
 	unsigned long gdt, tss;
 
 	store_gdt(&gdt_desc);
 	gdt = gdt_desc.address;
 
-	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
+	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size + 1);
 
-	if (ptr_ok(gdt)) {
+	if (ptr_ok(gdt, gdt_desc.size)) {
 		gdt += GDT_ENTRY_TSS << 3;
 		tss = *(u16 *)(gdt+2);
 		tss += *(u8 *)(gdt+4) << 16;
 		tss += *(u8 *)(gdt+7) << 24;
 		printk("double fault, tss at %08lx\n", tss);
 
-		if (ptr_ok(tss)) {
-			struct tss_struct *t = (struct tss_struct *)tss;
+		if (ptr_ok(tss, *(u16 *)gdt)) {
+			const struct tss_struct *t = (struct tss_struct *)tss;
+			struct {
+				struct pt_regs common;
+				struct {
+					unsigned long es;
+					unsigned long ds;
+					unsigned long fs;
+					unsigned long gs;
+				} vm86;
+			} regs;
+
+			/* for current/current_thread_info to work... */
+			*THREAD_INFO_FROM(self->esp) = *THREAD_INFO_FROM(t->esp0 - 1);
 
 			printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
 
 			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
 				t->eax, t->ebx, t->ecx, t->edx);
-			printk("esi = %08lx, edi = %08lx\n",
-				t->esi, t->edi);
+			printk("esi = %08lx, edi = %08lx, ebp = %08lx\n",
+				t->esi, t->edi, t->ebp);
+
+			regs.common.ebx = t->ebx;
+			regs.common.ecx = t->ecx;
+			regs.common.edx = t->edx;
+			regs.common.esi = t->esi;
+			regs.common.edi = t->edi;
+			regs.common.ebp = t->ebp;
+			regs.common.eax = t->eax;
+			regs.common.xds = t->ds;
+			regs.common.xes = t->es;
+			regs.common.orig_eax = -1;
+			regs.common.eip = t->eip;
+			regs.common.xcs = t->cs;
+			regs.common.eflags = t->eflags;
+			regs.common.esp = t->esp;
+			regs.common.xss = t->ss;
+			if (t->eflags & X86_EFLAGS_VM) {
+				regs.common.xds = 0;
+				regs.common.xes = 0;
+				regs.vm86.es = t->es;
+				regs.vm86.ds = t->ds;
+				regs.vm86.fs = t->fs;
+				regs.vm86.gs = t->gs;
+			}
+			notify_die(DIE_DOUBLE_FAULT, "double fault", &regs.common, 0, 8, SIGKILL);
 		}
 	}
 
 	for (;;) /* nothing */;
 }
-
-struct tss_struct doublefault_tss __cacheline_aligned = {
-	.esp0		= STACK_START,
-	.ss0		= __KERNEL_DS,
-	.ldt		= 0,
-	.io_bitmap_base	= INVALID_IO_BITMAP_OFFSET,
-
-	.eip		= (unsigned long) doublefault_fn,
-	.eflags		= X86_EFLAGS_SF | 0x2,	/* 0x2 bit is always set */
-	.esp		= STACK_START,
-	.es		= __USER_DS,
-	.cs		= __KERNEL_CS,
-	.ss		= __KERNEL_DS,
-	.ds		= __USER_DS,
-
-	.__cr3		= __pa(swapper_pg_dir)
-};
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/traps.c 2.6.16-rc1-i386-doublefault/arch/i386/kernel/traps.c
--- /home/jbeulich/tmp/linux-2.6.16-rc1/arch/i386/kernel/traps.c	2006-01-18 12:38:24.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/arch/i386/kernel/traps.c	2006-01-30 09:58:51.104384192 +0100
@@ -61,6 +61,26 @@ asmlinkage int system_call(void);
 struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
 		{ 0, 0 }, { 0, 0 } };
 
+void doublefault_fn(void);
+
+#ifdef N_EXCEPTION_TSS
+struct tss_struct exception_tss[NR_CPUS][N_EXCEPTION_TSS] __cacheline_aligned = {
+	[0 ... NR_CPUS-1] = {
+		[0 ... N_EXCEPTION_TSS-1] = {
+			.cs       = __KERNEL_CS,
+			.ss       = __KERNEL_DS,
+			.ss0      = __KERNEL_DS,
+			.__cr3    = __pa(swapper_pg_dir),
+			.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+			.ds       = __USER_DS,
+			.es       = __USER_DS,
+			.eflags	  = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
+		},
+		[DOUBLEFAULT_TSS].eip = (unsigned long)doublefault_fn
+	}
+};
+#endif
+
 /* Do we ignore FPU interrupts ? */
 char ignore_fpu_irq = 0;
 
@@ -1083,10 +1103,12 @@ static void __init set_system_gate(unsig
 	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
 }
 
+#ifdef N_EXCEPTION_TSS
 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
 {
 	_set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
 }
+#endif
 
 
 void __init trap_init(void)
@@ -1111,7 +1133,9 @@ void __init trap_init(void)
 	set_trap_gate(5,&bounds);
 	set_trap_gate(6,&invalid_op);
 	set_trap_gate(7,&device_not_available);
-	set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
+#ifdef DOUBLEFAULT_TSS
+	set_task_gate(8,GDT_ENTRY_EXCEPTION_TSS + DOUBLEFAULT_TSS);
+#endif
 	set_trap_gate(9,&coprocessor_segment_overrun);
 	set_trap_gate(10,&invalid_TSS);
 	set_trap_gate(11,&segment_not_present);
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/kdebug.h 2.6.16-rc1-i386-doublefault/include/asm-i386/kdebug.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/kdebug.h	2006-01-03 04:21:10.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/kdebug.h	2006-01-27 16:29:53.000000000 +0100
@@ -39,6 +39,7 @@ enum die_val {
 	DIE_CALL,
 	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
+	DIE_DOUBLE_FAULT
 };
 
 static inline int notify_die(enum die_val val, const char *str,
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/processor.h 2.6.16-rc1-i386-doublefault/include/asm-i386/processor.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/processor.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/processor.h	2006-01-25 17:08:53.000000000 +0100
@@ -90,7 +90,9 @@ struct cpuinfo_x86 {
 
 extern struct cpuinfo_x86 boot_cpu_data;
 extern struct cpuinfo_x86 new_cpu_data;
-extern struct tss_struct doublefault_tss;
+#ifdef N_EXCEPTION_TSS
+extern struct tss_struct exception_tss[NR_CPUS][N_EXCEPTION_TSS];
+#endif
 DECLARE_PER_CPU(struct tss_struct, init_tss);
 
 #ifdef CONFIG_SMP
@@ -486,6 +488,13 @@ struct thread_struct {
 	.io_bitmap	= { [ 0 ... IO_BITMAP_LONGS] = ~0 },		\
 }
 
+#ifndef CONFIG_NLKD_FTA
+# define EXCEPTION_STACK_ORDER 0
+#else
+# define EXCEPTION_STACK_ORDER THREAD_ORDER
+#endif
+#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+
 static inline void load_esp0(struct tss_struct *tss, struct thread_struct *thread)
 {
 	tss->esp0 = thread->esp0;
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/segment.h 2.6.16-rc1-i386-doublefault/include/asm-i386/segment.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/segment.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/segment.h	2006-01-30 09:57:02.400909632 +0100
@@ -43,7 +43,8 @@
  *  28 - unused
  *  29 - unused
  *  30 - unused
- *  31 - TSS for double fault handler
+ *  31 - TSS for first exception handler (double fault)
+ *  32+  TSSes for further exception handlers
  */
 #define GDT_ENTRY_TLS_ENTRIES	3
 #define GDT_ENTRY_TLS_MIN	6
@@ -74,12 +75,22 @@
 #define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
 #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
 
-#define GDT_ENTRY_DOUBLEFAULT_TSS	31
+#define GDT_ENTRY_EXCEPTION_TSS	31
+#ifdef CONFIG_DOUBLEFAULT
+# define DOUBLEFAULT_TSS 0
+# define N_EXCEPTION_TSS 1
+#else
+# undef GDT_ENTRY_EXCEPTION_TSS
+#endif
 
 /*
- * The GDT has 32 entries
+ * The GDT has 31+ entries
  */
-#define GDT_ENTRIES 32
+#ifdef N_EXCEPTION_TSS
+# define GDT_ENTRIES (31 + N_EXCEPTION_TSS)
+#else
+# define GDT_ENTRIES 31
+#endif
 
 #define GDT_SIZE (GDT_ENTRIES * 8)
 
diff -Npru /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/thread_info.h 2.6.16-rc1-i386-doublefault/include/asm-i386/thread_info.h
--- /home/jbeulich/tmp/linux-2.6.16-rc1/include/asm-i386/thread_info.h	2006-01-18 12:39:04.000000000 +0100
+++ 2.6.16-rc1-i386-doublefault/include/asm-i386/thread_info.h	2006-01-25 10:41:49.000000000 +0100
@@ -54,10 +54,11 @@ struct thread_info {
 
 #define PREEMPT_ACTIVE		0x10000000
 #ifdef CONFIG_4KSTACKS
-#define THREAD_SIZE            (4096)
+#define THREAD_ORDER 0
 #else
-#define THREAD_SIZE		(8192)
+#define THREAD_ORDER 1
 #endif
+#define THREAD_SIZE (4096 << THREAD_ORDER)
 
 #define STACK_WARN             (THREAD_SIZE/8)
 /*

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] double fault enhancements
  2006-01-30  9:58 ` Arjan van de Ven
@ 2006-01-30 10:12   ` Jan Beulich
  0 siblings, 0 replies; 5+ messages in thread
From: Jan Beulich @ 2006-01-30 10:12 UTC (permalink / raw)
  To: Arjan Ven; +Cc: linux-kernel

>I would hope TSS and such remain in the kernel static space, because
>those are the kind of things I'd like to be read only over time...

I'm not sure what you're trying to say. A TSS can't possibly be read-only, as the processor will need to write to it
any time it gets used.

>Also the last chunk of your patch has nothing to do with what you
>describe here... and seems sort of suprious. (it might be a useful
>cleanup, but it should be independent)

It is relevant, for the preprocessor pseudo-assertion in cpu_init() to work. Anyway, I submitted the THREAD_ORDER
introduction as a separate cleanup-like (as you suggest) patch before, without getting any positive or negative
responses back...

Jan


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] double fault enhancements
       [not found] <43DDF050.76F0.0078.0@novell.com.suse.lists.linux.kernel>
@ 2006-01-31 12:35 ` Andi Kleen
  0 siblings, 0 replies; 5+ messages in thread
From: Andi Kleen @ 2006-01-31 12:35 UTC (permalink / raw)
  To: Jan Beulich; +Cc: linux-kernel

"Jan Beulich" <JBeulich@novell.com> writes:

> From: Jan Beulich <jbeulich@novell.com>
> 
> Make the double fault handler use CPU-specific stacks. Add some
> abstraction to simplify future change of other exception handlers to go
> through task gates. Change the pointer validity checks in the double
> fault handler to account for the fact that both GDT and TSS aren't in
> static kernel space anymore. Add a new notification of the event
> through the die notifier chain, also providing some environmental
> adjustments so that various infrastructural things work independent of
> the fact that the fault and the callbacks are running on other then the
> normal kernel stack.

Looks good to me. Feel free to include a Acked-by: ak@suse.de
in future versions.

-Andi


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2006-01-31 12:35 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-30  9:54 [PATCH] double fault enhancements Jan Beulich
2006-01-30  9:58 ` Arjan van de Ven
2006-01-30 10:12   ` Jan Beulich
2006-01-30 10:08 ` (correction) " Jan Beulich
     [not found] <43DDF050.76F0.0078.0@novell.com.suse.lists.linux.kernel>
2006-01-31 12:35 ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox