public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Manfred Spraul <manfred@colorfullife.com>
To: linux-kernel@vger.kernel.org
Subject: [PATCH] cache colour task_structs
Date: Wed, 31 Oct 2001 20:27:41 +0100	[thread overview]
Message-ID: <3BE050AD.C6D7CE4B@colorfullife.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 939 bytes --]

All tasks structs are 8 kB aligned, which results in very bad cache
behaviour when walking the task chains.

The attached patch moves the task structure into a slab, with normal
cache colouring.
It's tested with i386 SMP.(i.e. it boots and runs X)

'current=%esp&0xffffe000' was replaced with reusing %cr2.

There are 2 changes that might have side effects:

1) arch/i386/kernel/entry.S:
<<<<<<
error_code:
[...]
-       GET_CURRENT(%ebx)
        call *%edi
        addl $8,%esp
+       GET_CURRENT(%ebx)
<<<<<
The pointer to current was loaded into %ebx before the call to the error
handler, now that only happens after the call. As far as I can see the
load before the call is not required.

2) arch/i386/kernel/smpboot.c:
- stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
+ stack_start.esp = (void *) (THREAD_SIZE + (char
*)TSK_TO_KSTACK(idle));

I don't understand why the top 3 kB of the stack are not used.

--
	Manfred

[-- Attachment #2: patch-cr2 --]
[-- Type: text/plain, Size: 12703 bytes --]

// $Header$
// Kernel Version:
//  VERSION = 2
//  PATCHLEVEL = 4
//  SUBLEVEL = 14
//  EXTRAVERSION =-pre3
diff -ur 2.4/include/asm-i386/current.h build-2.4/include/asm-i386/current.h
--- 2.4/include/asm-i386/current.h	Sat Aug 15 01:35:22 1998
+++ build-2.4/include/asm-i386/current.h	Wed Oct 31 17:19:41 2001
@@ -5,11 +5,27 @@
 
 static inline struct task_struct * get_current(void)
 {
-	struct task_struct *current;
-	__asm__("andl %%esp,%0; ":"=r" (current) : "0" (~8191UL));
-	return current;
- }
- 
+	struct task_struct *tsk;
+	__asm__("movl %%cr2,%0;": "=r" (tsk));
+	return tsk;
+}
+
+/* for within NMI, do_page_fault, cpu_init */
+static inline struct task_struct * hard_get_current(void)
+{
+	struct task_struct **ptsk;
+	__asm__("andl %%esp,%0; ":"=r" (ptsk) : "0" (~8191UL));
+	return *ptsk;
+}
+
+static inline void set_current(struct task_struct *tsk)
+{
+	__asm__("movl %0,%%cr2;"
+			: /* no output */
+			:"r" (tsk));
+}
+   
+/* Note: the implementation is hardcoded into arch/i386/lib/getuser.S */
 #define current get_current()
 
 #endif /* !(_I386_CURRENT_H) */
diff -ur 2.4/include/asm-i386/processor.h build-2.4/include/asm-i386/processor.h
--- 2.4/include/asm-i386/processor.h	Sun Oct 28 02:12:45 2001
+++ build-2.4/include/asm-i386/processor.h	Wed Oct 31 18:32:01 2001
@@ -14,6 +14,7 @@
 #include <asm/types.h>
 #include <asm/sigcontext.h>
 #include <asm/cpufeature.h>
+#include <asm/atomic.h>
 #include <linux/cache.h>
 #include <linux/config.h>
 #include <linux/threads.h>
@@ -383,6 +384,16 @@
 	unsigned long	io_bitmap[IO_BITMAP_SIZE+1];
 };
 
+struct task_struct_info
+{
+	void *kstack;
+	atomic_t users;
+};
+
+/* the init task stack is allocated externally */
+#define INIT_TASK_SIZE	(sizeof(struct task_struct) + sizeof(struct task_struct_info))
+extern unsigned long init_task_stack[];
+
 #define INIT_THREAD  {						\
 	0,							\
 	0, 0, 0, 0, 						\
@@ -395,7 +406,7 @@
 
 #define INIT_TSS  {						\
 	0,0, /* back_link, __blh */				\
-	sizeof(init_stack) + (long) &init_stack, /* esp0 */	\
+	0, /* esp0 */ 						\
 	__KERNEL_DS, 0, /* ss0 */				\
 	0,0,0,0,0,0, /* stack1, stack2 */			\
 	0, /* cr3 */						\
@@ -444,16 +455,19 @@
 }
 
 unsigned long get_wchan(struct task_struct *p);
-#define KSTK_EIP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
-#define KSTK_ESP(tsk)	(((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
+#define TSK_TO_KSTACK(tsk) \
+	((unsigned long *) ((struct task_struct_info*)(tsk+1))->kstack)
+
+#define KSTK_EIP(tsk)	(TSK_TO_KSTACK(tsk)[2043])
+#define KSTK_ESP(tsk)	(TSK_TO_KSTACK(tsk)[2046])
 
 #define THREAD_SIZE (2*PAGE_SIZE)
-#define alloc_task_struct() ((struct task_struct *) __get_free_pages(GFP_KERNEL,1))
-#define free_task_struct(p) free_pages((unsigned long) (p), 1)
-#define get_task_struct(tsk)      atomic_inc(&virt_to_page(tsk)->count)
+void init_tsk_allocator(void);
+struct task_struct * alloc_task_struct(void);
+void get_task_struct(struct task_struct *tsk);
+void free_task_struct(struct task_struct *tsk);
 
 #define init_task	(init_task_union.task)
-#define init_stack	(init_task_union.stack)
 
 struct microcode {
 	unsigned int hdrver;
diff -ur 2.4/arch/i386/kernel/entry.S build-2.4/arch/i386/kernel/entry.S
--- 2.4/arch/i386/kernel/entry.S	Sun Oct 28 02:12:43 2001
+++ build-2.4/arch/i386/kernel/entry.S	Wed Oct 31 17:34:54 2001
@@ -129,8 +129,7 @@
 .previous
 
 #define GET_CURRENT(reg) \
-	movl $-8192, reg; \
-	andl %esp, reg
+	movl %cr2, reg
 
 ENTRY(lcall7)
 	pushfl			# We get a different stack layout with call gates,
@@ -144,7 +143,7 @@
 	movl %ecx,CS(%esp)	#
 	movl %esp,%ebx
 	pushl %ebx
-	andl $-8192,%ebx	# GET_CURRENT
+	GET_CURRENT(%ebx)
 	movl exec_domain(%ebx),%edx	# Get the execution domain
 	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
 	pushl $0x7
@@ -165,7 +164,7 @@
 	movl %ecx,CS(%esp)	#
 	movl %esp,%ebx
 	pushl %ebx
-	andl $-8192,%ebx	# GET_CURRENT
+	GET_CURRENT(%ebx)
 	movl exec_domain(%ebx),%edx	# Get the execution domain
 	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
 	pushl $0x27
@@ -286,9 +285,9 @@
 	movl $(__KERNEL_DS),%edx
 	movl %edx,%ds
 	movl %edx,%es
-	GET_CURRENT(%ebx)
 	call *%edi
 	addl $8,%esp
+	GET_CURRENT(%ebx)
 	jmp ret_from_exception
 
 ENTRY(coprocessor_error)
diff -ur 2.4/arch/i386/kernel/head.S build-2.4/arch/i386/kernel/head.S
--- 2.4/arch/i386/kernel/head.S	Sun Oct 28 02:12:43 2001
+++ build-2.4/arch/i386/kernel/head.S	Tue Oct 30 22:02:32 2001
@@ -261,13 +261,13 @@
 #ifdef CONFIG_SMP
 	movb ready, %cl	
 	cmpb $1,%cl
-	je 1f			# the first CPU calls start_kernel
+	je 1f			# the first CPU calls initialize_primary
 				# all other CPUs call initialize_secondary
 	call SYMBOL_NAME(initialize_secondary)
 	jmp L6
 1:
 #endif
-	call SYMBOL_NAME(start_kernel)
+	call SYMBOL_NAME(initialize_primary)
 L6:
 	jmp L6			# main should never return here, but
 				# just in case, we know what happens.
@@ -320,7 +320,7 @@
 	ret
 
 ENTRY(stack_start)
-	.long SYMBOL_NAME(init_task_union)+8192
+	.long SYMBOL_NAME(init_task_stack)+8192
 	.long __KERNEL_DS
 
 /* This is the default interrupt "handler" :-) */
diff -ur 2.4/arch/i386/kernel/init_task.c build-2.4/arch/i386/kernel/init_task.c
--- 2.4/arch/i386/kernel/init_task.c	Sun Oct 28 02:12:43 2001
+++ build-2.4/arch/i386/kernel/init_task.c	Wed Oct 31 00:14:02 2001
@@ -13,14 +13,18 @@
 
 /*
  * Initial task structure.
- *
+ */
+union task_union init_task_union =
+		{ INIT_TASK(init_task_union.task) };
+/*
  * We need to make sure that this is 8192-byte aligned due to the
  * way process stacks are handled. This is done by having a special
  * "init_task" linker map entry..
  */
-union task_union init_task_union 
+
+unsigned long init_task_stack[THREAD_SIZE/sizeof(unsigned long)]
 	__attribute__((__section__(".data.init_task"))) =
-		{ INIT_TASK(init_task_union.task) };
+	{ (unsigned long)&init_task_union,};
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
diff -ur 2.4/arch/i386/kernel/irq.c build-2.4/arch/i386/kernel/irq.c
--- 2.4/arch/i386/kernel/irq.c	Sun Oct 28 10:07:01 2001
+++ build-2.4/arch/i386/kernel/irq.c	Wed Oct 31 00:25:06 2001
@@ -223,7 +223,6 @@
 			continue;
 		}
 		esp &= ~(THREAD_SIZE-1);
-		esp += sizeof(struct task_struct);
 		show_stack((void*)esp);
  	}
 	printk("\nCPU %d:",cpu);
diff -ur 2.4/arch/i386/kernel/nmi.c build-2.4/arch/i386/kernel/nmi.c
--- 2.4/arch/i386/kernel/nmi.c	Sun Sep 30 16:25:06 2001
+++ build-2.4/arch/i386/kernel/nmi.c	Wed Oct 31 18:08:06 2001
@@ -261,12 +261,10 @@
 
 void nmi_watchdog_tick (struct pt_regs * regs)
 {
-
 	/*
-	 * Since current-> is always on the stack, and we always switch
-	 * the stack NMI-atomically, it's safe to use smp_processor_id().
+	 * NMI can interrupt page faults, use hard_get_current.
 	 */
-	int sum, cpu = smp_processor_id();
+	int sum, cpu = hard_get_current()->processor;
 
 	sum = apic_timer_irqs[cpu];
 
@@ -282,6 +280,7 @@
 			 * We are in trouble anyway, lets at least try
 			 * to get a message out.
 			 */
+			set_current(hard_get_current());
 			bust_spinlocks(1);
 			printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
 			show_registers(regs);
diff -ur 2.4/arch/i386/kernel/process.c build-2.4/arch/i386/kernel/process.c
--- 2.4/arch/i386/kernel/process.c	Thu Oct 11 15:19:41 2001
+++ build-2.4/arch/i386/kernel/process.c	Wed Oct 31 17:20:49 2001
@@ -569,6 +569,64 @@
 	new_mm->context.cpuvalid = ~0UL;	/* valid on all CPU's - they can't have stale data */
 }
 
+struct full_task_struct
+{
+	struct task_struct tsk;
+	struct task_struct_info info;
+};
+
+static kmem_cache_t * tsk_cache;
+
+struct task_struct * alloc_task_struct(void)
+{
+	struct full_task_struct *f = kmem_cache_alloc(tsk_cache, GFP_KERNEL);
+	if (!f)
+		return NULL;
+	f->info.kstack = (void*)__get_free_pages(GFP_KERNEL,1);
+	if (!f->info.kstack) {
+		kmem_cache_free(tsk_cache, f);
+		return NULL;
+	}
+	*(void**)f->info.kstack = &f->tsk;
+	atomic_set(&f->info.users, 1);	
+	return &f->tsk;
+}
+
+void get_task_struct(struct task_struct *tsk)
+{
+	struct full_task_struct *f = (struct full_task_struct*)tsk;
+	atomic_inc(&f->info.users);
+}
+
+void free_task_struct(struct task_struct *tsk)
+{
+	struct full_task_struct *f = (struct full_task_struct*)tsk;
+	if(atomic_dec_and_test(&f->info.users)) {
+		free_pages((unsigned long) f->info.kstack, 1);
+		kmem_cache_free(tsk_cache, f);
+	}
+}
+
+void __init init_tsk_allocator(void)
+{
+	tsk_cache = kmem_cache_create("task_cache",
+					 sizeof(struct full_task_struct),
+					 0,
+					 SLAB_HWCACHE_ALIGN,
+					 NULL, NULL);
+	if (!tsk_cache)
+		panic("Cannot create task struct cache");
+}
+
+extern asmlinkage void start_kernel(void);
+void __init initialize_primary(void)
+{
+	struct full_task_struct *f = (struct full_task_struct*)hard_get_current();
+	atomic_set(&f->info.users, 1);
+	f->info.kstack = init_task_stack;
+	set_current(&f->tsk);
+	start_kernel();
+}
 /*
  * Save a segment.
  */
@@ -580,8 +638,9 @@
 	struct task_struct * p, struct pt_regs * regs)
 {
 	struct pt_regs * childregs;
+	struct full_task_struct *f = (struct full_task_struct *)p;
 
-	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p)) - 1;
+	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) f->info.kstack)) - 1;
 	struct_cpy(childregs, regs);
 	childregs->eax = 0;
 	childregs->esp = esp;
@@ -685,6 +744,7 @@
 	 * Reload esp0, LDT and the page table pointer:
 	 */
 	tss->esp0 = next->esp0;
+	set_current(next_p);
 
 	/*
 	 * Save away %fs and %gs. No need to save %es and %ds, as
diff -ur 2.4/arch/i386/kernel/smpboot.c build-2.4/arch/i386/kernel/smpboot.c
--- 2.4/arch/i386/kernel/smpboot.c	Thu Oct 11 15:19:41 2001
+++ build-2.4/arch/i386/kernel/smpboot.c	Tue Oct 30 23:41:05 2001
@@ -482,6 +482,7 @@
 	 * We don't actually need to load the full TSS,
 	 * basically just the stack pointer and the eip.
 	 */
+	set_current(hard_get_current());
 
 	asm volatile(
 		"movl %0,%%esp\n\t"
@@ -815,7 +816,7 @@
 
 	/* So we see what's up   */
 	printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
-	stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle);
+	stack_start.esp = (void *) (THREAD_SIZE + (char *)TSK_TO_KSTACK(idle));
 
 	/*
 	 * This grunge runs the startup process for
diff -ur 2.4/arch/i386/kernel/traps.c build-2.4/arch/i386/kernel/traps.c
--- 2.4/arch/i386/kernel/traps.c	Sun Oct 28 02:12:43 2001
+++ build-2.4/arch/i386/kernel/traps.c	Wed Oct 31 18:23:20 2001
@@ -209,7 +209,7 @@
 	printk("ds: %04x   es: %04x   ss: %04x\n",
 		regs->xds & 0xffff, regs->xes & 0xffff, ss);
 	printk("Process %s (pid: %d, stackpage=%08lx)",
-		current->comm, current->pid, 4096+(unsigned long)current);
+		current->comm, current->pid, TSK_TO_KSTACK(current));
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -422,7 +422,7 @@
 {
 	unsigned char reason = inb(0x61);
 
-	++nmi_count(smp_processor_id());
+	++nmi_count(hard_get_current()->processor);
 
 	if (!(reason & 0xc0)) {
 #if CONFIG_X86_LOCAL_APIC
diff -ur 2.4/arch/i386/mm/fault.c build-2.4/arch/i386/mm/fault.c
--- 2.4/arch/i386/mm/fault.c	Sun Oct 28 02:12:43 2001
+++ build-2.4/arch/i386/mm/fault.c	Wed Oct 31 17:37:18 2001
@@ -159,12 +159,13 @@
 
 	/* get the address */
 	__asm__("movl %%cr2,%0":"=r" (address));
+	/* and restore current */
+	tsk = hard_get_current();
+	set_current(tsk);
 
 	/* It's safe to allow irq's after cr2 has been saved */
 	if (regs->eflags & X86_EFLAGS_IF)
 		local_irq_enable();
-
-	tsk = current;
 
 	/*
 	 * We fault-in kernel-space virtual memory on-demand. The
diff -ur 2.4/arch/i386/lib/getuser.S build-2.4/arch/i386/lib/getuser.S
--- 2.4/arch/i386/lib/getuser.S	Mon Jan 12 22:42:52 1998
+++ build-2.4/arch/i386/lib/getuser.S	Wed Oct 31 17:18:28 2001
@@ -27,8 +27,7 @@
 .align 4
 .globl __get_user_1
 __get_user_1:
-	movl %esp,%edx
-	andl $0xffffe000,%edx
+	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 1:	movzbl (%eax),%edx
@@ -39,9 +38,8 @@
 .globl __get_user_2
 __get_user_2:
 	addl $1,%eax
-	movl %esp,%edx
 	jc bad_get_user
-	andl $0xffffe000,%edx
+	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 2:	movzwl -1(%eax),%edx
@@ -52,9 +50,8 @@
 .globl __get_user_4
 __get_user_4:
 	addl $3,%eax
-	movl %esp,%edx
 	jc bad_get_user
-	andl $0xffffe000,%edx
+	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 3:	movl -3(%eax),%edx
diff -u 2.4/init/main.c build-2.4/init/main.c
--- 2.4/init/main.c	Sun Oct 28 02:12:45 2001
+++ build-2.4/init/main.c	Wed Oct 31 17:16:17 2001
@@ -594,6 +594,9 @@
 	mempages = num_physpages;
 
 	fork_init(mempages);
+#ifdef __i386__
+	init_tsk_allocator();
+#endif
 	proc_caches_init();
 	vfs_caches_init(mempages);
 	buffer_init(mempages);

             reply	other threads:[~2001-10-31 19:27 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-10-31 19:27 Manfred Spraul [this message]
2001-11-01  1:04 ` [PATCH] cache colour task_structs Alan Cox
2001-11-01 10:49   ` Manfred Spraul
     [not found] <3BE050AD.C6D7CE4B@colorfullife.com.suse.lists.linux.kernel>
     [not found] ` <E15z6HM-0005gW-00@the-village.bc.nu.suse.lists.linux.kernel>
2001-11-01  1:43   ` Andi Kleen
2001-11-01  2:18     ` Davide Libenzi
2001-11-01  4:42     ` Keith Owens

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3BE050AD.C6D7CE4B@colorfullife.com \
    --to=manfred@colorfullife.com \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox