All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin LaHaise <bcrl@redhat.com>
To: Manfred Spraul <manfred@colorfullife.com>
Cc: Anton Blanchard <anton@samba.org>,
	"David S. Miller" <davem@redhat.com>,
	jakub@redhat.com, torvalds@transmeta.com,
	alan@lxorguk.ukuu.org.uk, arjanv@redhat.com,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] take 2 of the tr-based current
Date: Sun, 11 Nov 2001 22:32:49 -0500	[thread overview]
Message-ID: <20011111223249.B31746@redhat.com> (raw)
In-Reply-To: <20011108211143.A4797@redhat.com> <20011109041327.T4087@devserv.devel.redhat.com> <3BEBEE0B.BA1FD7EE@colorfullife.com> <20011109.070312.88700201.davem@redhat.com> <3BEBF730.86CAE1CC@colorfullife.com> <20011111110107.A4064@krispykreme> <3BEE4C04.4040406@colorfullife.com> <20011111233611.A7409@krispykreme> <3BEE84F4.8090103@colorfullife.com>
In-Reply-To: <3BEE84F4.8090103@colorfullife.com>; from manfred@colorfullife.com on Sun, Nov 11, 2001 at 03:02:28PM +0100

On Sun, Nov 11, 2001 at 03:02:28PM +0100, Manfred Spraul wrote:
> Ben, is it intentional that get_TR is _not_ marked as inline? Your
> version produces explicit function calls with -O2, and incorrect code
> with -O99 (gcc decides to inline get_TR even without an inline
> directive, and then optimizes away the calls after schedule.)

No, I'd taken the inline keyword out during testing.  It's now back in.

> It seems that Anton's version generates the best code.
> I've tested the attached version with egcs-1.1.2, gcc-2.96-98 and
> gcc3-3.0.1-3, with -O0, -O2 and -O99.

Done and tested:

#APP
        str %bx
#NO_APP
        call    schedule
        subl    $12, %esp
#APP
        str %ax
#NO_APP

with no duplication of the inline asm for cases where it is valid to 
optimize.  Updated patch is below.

		-ben
-- 
Fish.

... v2.4.13-ac8+tr.5.diff ...
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/kernel/entry.S v2.4.13-ac8+tr.5/arch/i386/kernel/entry.S
--- kernels/2.4/v2.4.13-ac8/arch/i386/kernel/entry.S	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/kernel/entry.S	Thu Nov  8 22:27:30 2001
@@ -45,6 +45,7 @@
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
+#include <asm/current_asm.h>
 
 EBX		= 0x00
 ECX		= 0x04
@@ -134,9 +135,6 @@
 	.long 3b,6b;	\
 .previous
 
-#define GET_CURRENT(reg) \
-	movl %cr2, reg
-
 ENTRY(lcall7)
 	pushfl			# We get a different stack layout with call gates,
 	pushl %eax		# which has to be cleaned up later..
@@ -149,7 +147,7 @@
 	movl %ecx,CS(%esp)	#
 	movl %esp,%ebx
 	pushl %ebx
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	movl exec_domain(%ebx),%edx	# Get the execution domain
 	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
 	pushl $0x7
@@ -170,7 +168,7 @@
 	movl %ecx,CS(%esp)	#
 	movl %esp,%ebx
 	pushl %ebx
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	movl exec_domain(%ebx),%edx	# Get the execution domain
 	movl 4(%edx),%edx	# Get the lcall7 handler for the domain
 	pushl $0x27
@@ -184,7 +182,7 @@
 	pushl %ebx
 	call SYMBOL_NAME(schedule_tail)
 	addl $4, %esp
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	testb $0x02,tsk_ptrace(%ebx)	# PT_TRACESYS
 	jne tracesys_exit
 	jmp	ret_from_sys_call
@@ -199,7 +197,7 @@
 ENTRY(system_call)
 	pushl %eax			# save orig_eax
 	SAVE_ALL
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	cmpl $(NR_syscalls),%eax
 	jae badsys
 	testb $0x02,tsk_ptrace(%ebx)	# PT_TRACESYS
@@ -251,7 +249,7 @@
 
 	ALIGN
 ENTRY(ret_from_intr)
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 ret_from_exception:
 	movl EFLAGS(%esp),%eax		# mix EFLAGS and CS
 	movb CS(%esp),%al
@@ -297,7 +295,7 @@
 	movl %edx,%ds
 2:	call *%edi
 	addl $8,%esp
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	jmp ret_from_exception
 
 ENTRY(coprocessor_error)
@@ -313,7 +311,7 @@
 ENTRY(device_not_available)
 	pushl $-1		# mark this as an int
 	SAVE_ALL
-	GET_CURRENT(%ebx)
+	GET_CURRENT(%ebx,%bx)
 	movl %cr0,%eax
 	testl $0x4,%eax			# EM (math emulation bit)
 	jne device_not_available_emulate
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/kernel/ldt.c v2.4.13-ac8+tr.5/arch/i386/kernel/ldt.c
--- kernels/2.4/v2.4.13-ac8/arch/i386/kernel/ldt.c	Thu Nov  1 16:39:57 2001
+++ v2.4.13-ac8+tr.5/arch/i386/kernel/ldt.c	Thu Nov  8 18:25:56 2001
@@ -12,11 +12,13 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
+#include <linux/per_cpu.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
+#include <asm/descfn.h>
 
 /*
  * read_ldt() is not really atomic - this is not a problem since
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/kernel/nmi.c v2.4.13-ac8+tr.5/arch/i386/kernel/nmi.c
--- kernels/2.4/v2.4.13-ac8/arch/i386/kernel/nmi.c	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/kernel/nmi.c	Sat Nov 10 14:00:33 2001
@@ -264,7 +264,7 @@
 	/*
 	 * NMI can interrupt page faults, use hard_get_current.
 	 */
-	int sum, cpu = hard_get_current()->processor;
+	int sum, cpu = hard_smp_processor_id();
 
 	sum = apic_timer_irqs[cpu];
 
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/kernel/setup.c v2.4.13-ac8+tr.5/arch/i386/kernel/setup.c
--- kernels/2.4/v2.4.13-ac8/arch/i386/kernel/setup.c	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/kernel/setup.c	Sat Nov 10 16:56:23 2001
@@ -108,6 +108,7 @@
 #include <asm/cobalt.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
+#include <asm/descfn.h>
 #include <asm/e820.h>
 #include <asm/dma.h>
 #include <asm/mpspec.h>
@@ -2852,7 +2853,8 @@
  */
 void __init cpu_init (void)
 {
-	int nr = smp_processor_id();
+	struct task_struct *cur = hard_get_current();
+	int nr = cur->processor;
 	struct tss_struct * t = &init_tss[nr];
 
 	if (test_and_set_bit(nr, &cpu_initialized)) {
@@ -2884,17 +2886,19 @@
 	 * set up and load the per-CPU TSS and LDT
 	 */
 	atomic_inc(&init_mm.mm_count);
-	current->active_mm = &init_mm;
-	if(current->mm)
+	cur->active_mm = &init_mm;
+	if(cur->mm)
 		BUG();
-	enter_lazy_tlb(&init_mm, current, nr);
+	enter_lazy_tlb(&init_mm, cur, nr);
 
-	t->esp0 = current->thread.esp0;
+	t->esp0 = cur->thread.esp0;
 	set_tss_desc(nr,t);
 	gdt_table[__TSS(nr)].b &= 0xfffffdff;
 	load_TR(nr);
 	load_LDT(&init_mm);
 
+	set_current(cur);
+
 	/*
 	 * Clear all 6 debug registers:
 	 */
@@ -2908,8 +2912,8 @@
 	/*
 	 * Force FPU initialization:
 	 */
-	current->flags &= ~PF_USEDFPU;
-	current->used_math = 0;
+	cur->flags &= ~PF_USEDFPU;
+	cur->used_math = 0;
 	stts();
 }
 
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/kernel/smpboot.c v2.4.13-ac8+tr.5/arch/i386/kernel/smpboot.c
--- kernels/2.4/v2.4.13-ac8/arch/i386/kernel/smpboot.c	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/kernel/smpboot.c	Sat Nov 10 15:56:44 2001
@@ -507,14 +507,14 @@
 }
 
 /* which physical APIC ID maps to which logical CPU number */
-volatile int physical_apicid_2_cpu[MAX_APICID];
+volatile int physical_apicid_to_cpu[MAX_APICID];
 /* which logical CPU number maps to which physical APIC ID */
-volatile int cpu_2_physical_apicid[NR_CPUS];
+volatile int cpu_to_physical_apicid[NR_CPUS];
 
 /* which logical APIC ID maps to which logical CPU number */
-volatile int logical_apicid_2_cpu[MAX_APICID];
+volatile int logical_apicid_to_cpu[MAX_APICID];
 /* which logical CPU number maps to which logical APIC ID */
-volatile int cpu_2_logical_apicid[NR_CPUS];
+volatile int cpu_to_logical_apicid[NR_CPUS];
 
 static inline void init_cpu_to_apicid(void)
 /* Initialize all maps between cpu number and apicids */
@@ -522,12 +522,12 @@
 	int apicid, cpu;
 
 	for (apicid = 0; apicid < MAX_APICID; apicid++) {
-		physical_apicid_2_cpu[apicid] = -1;
-		logical_apicid_2_cpu[apicid] = -1;
+		physical_apicid_to_cpu[apicid] = -1;
+		logical_apicid_to_cpu[apicid] = -1;
 	}
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		cpu_2_physical_apicid[cpu] = -1;
-		cpu_2_logical_apicid[cpu] = -1;
+		cpu_to_physical_apicid[cpu] = -1;
+		cpu_to_logical_apicid[cpu] = -1;
 	}
 }
 
@@ -538,11 +538,11 @@
  */
 {
 	if (clustered_apic_mode) {
-		logical_apicid_2_cpu[apicid] = cpu;	
-		cpu_2_logical_apicid[cpu] = apicid;
+		logical_apicid_to_cpu[apicid] = cpu;	
+		cpu_to_logical_apicid[cpu] = apicid;
 	} else {
-		physical_apicid_2_cpu[apicid] = cpu;	
-		cpu_2_physical_apicid[cpu] = apicid;
+		physical_apicid_to_cpu[apicid] = cpu;	
+		cpu_to_physical_apicid[cpu] = apicid;
 	}
 }
 
@@ -553,11 +553,11 @@
  */
 {
 	if (clustered_apic_mode) {
-		logical_apicid_2_cpu[apicid] = -1;	
-		cpu_2_logical_apicid[cpu] = -1;
+		logical_apicid_to_cpu[apicid] = -1;	
+		cpu_to_logical_apicid[cpu] = -1;
 	} else {
-		physical_apicid_2_cpu[apicid] = -1;	
-		cpu_2_physical_apicid[cpu] = -1;
+		physical_apicid_to_cpu[apicid] = -1;	
+		cpu_to_physical_apicid[cpu] = -1;
 	}
 }
 
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/lib/getuser.S v2.4.13-ac8+tr.5/arch/i386/lib/getuser.S
--- kernels/2.4/v2.4.13-ac8/arch/i386/lib/getuser.S	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/lib/getuser.S	Wed Nov  7 22:33:07 2001
@@ -8,6 +8,7 @@
  * return an error value in addition to the "real"
  * return value.
  */
+#include <asm/current_asm.h>
 
 /*
  * __get_user_X
@@ -27,7 +28,6 @@
 .align 4
 .globl __get_user_1
 __get_user_1:
-	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 1:	movzbl (%eax),%edx
@@ -39,7 +39,6 @@
 __get_user_2:
 	addl $1,%eax
 	jc bad_get_user
-	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 2:	movzwl -1(%eax),%edx
@@ -51,7 +50,6 @@
 __get_user_4:
 	addl $3,%eax
 	jc bad_get_user
-	movl %cr2,%edx
 	cmpl addr_limit(%edx),%eax
 	jae bad_get_user
 3:	movl -3(%eax),%edx
diff -urN kernels/2.4/v2.4.13-ac8/arch/i386/mm/fault.c v2.4.13-ac8+tr.5/arch/i386/mm/fault.c
--- kernels/2.4/v2.4.13-ac8/arch/i386/mm/fault.c	Tue Nov  6 20:43:22 2001
+++ v2.4.13-ac8+tr.5/arch/i386/mm/fault.c	Sat Nov 10 14:03:13 2001
@@ -25,6 +25,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/hardirq.h>
+#include <asm/desc.h>
 
 extern void die(const char *,struct pt_regs *,long);
 
@@ -148,7 +149,6 @@
 }
 
 asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
-extern unsigned long idt;
 
 /*
  * This routine handles page faults.  It determines the address,
@@ -173,9 +173,11 @@
 
 	/* get the address */
 	__asm__("movl %%cr2,%0":"=r" (address));
+
 	/* and restore current */
-	tsk = hard_get_current();
-	set_current(tsk);
+	set_current(hard_get_current());
+	tsk = current;
+
 
 	/* It's safe to allow irq's after cr2 has been saved */
 	if (regs->eflags & X86_EFLAGS_IF)
@@ -309,7 +311,7 @@
 	if (boot_cpu_data.f00f_bug) {
 		unsigned long nr;
 		
-		nr = (address - idt) >> 3;
+		nr = (address - (unsigned long)idt) >> 3;
 
 		if (nr == 6) {
 			do_invalid_op(regs, 0);
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/current.h v2.4.13-ac8+tr.5/include/asm-i386/current.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/current.h	Tue Nov  6 20:43:27 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/current.h	Sun Nov 11 19:28:29 2001
@@ -1,28 +1,25 @@
 #ifndef _I386_CURRENT_H
 #define _I386_CURRENT_H
 
-struct task_struct;
+#include <linux/per_cpu.h>
 
-static inline struct task_struct * get_current(void)
+static inline struct task_struct *get_current(void) __attribute__((const));
+static inline struct task_struct *get_current(void)
 {
-	struct task_struct *tsk;
-	__asm__("movl %%cr2,%0;": "=r" (tsk));
-	return tsk;
+	return per_data(smp_processor_id())->curr;
 }
 
 /* for within NMI, do_page_fault, cpu_init */
 static inline struct task_struct * hard_get_current(void)
 {
 	struct task_struct **ptsk;
-	__asm__("andl %%esp,%0; ":"=r" (ptsk) : "0" (~8191UL));
+	__asm__ __volatile__("andl %%esp,%0; ":"=r" (ptsk) : "0" (~8191UL));
 	return *ptsk;
 }
 
 static inline void set_current(struct task_struct *tsk)
 {
-	__asm__("movl %0,%%cr2;"
-			: /* no output */
-			:"r" (tsk));
+	per_data(smp_processor_id())->curr = tsk;
 }
    
 /* Note: the implementation is hardcoded into arch/i386/lib/getuser.S */
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/current_asm.h v2.4.13-ac8+tr.5/include/asm-i386/current_asm.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/current_asm.h	Wed Dec 31 19:00:00 1969
+++ v2.4.13-ac8+tr.5/include/asm-i386/current_asm.h	Sat Nov 10 12:48:47 2001
@@ -0,0 +1,30 @@
+/* asm/current_asm.h
+ */
+#ifndef __ASM__CURRENT_ASM_H
+#define __ASM__CURRENT_ASM_H
+
+#include <linux/config.h>
+#include <linux/per_cpu.h>
+#include <asm/desc.h>
+
+#if 1 /*def CONFIG_SMP*/
+/* Pass in the long and short versions of the register.
+ * eg GET_CURRENT(%ebx,%bx)
+ * All of this braindamage comes to us c/o a bug in gas: the
+ * opcode we want should actually be generated by strl, but 
+ * unfortunately gas doesn't realize that the operand size 
+ * prefix applies to str.  Please take a wet noodle and thread 
+ * it into my eye as that will be less painful than dealing 
+ * with this mess.  -ben
+ */
+#define GET_CURRENT(reg,regw)				\
+	str regw					\
+	; shll $LOG2_PER_CPU_SIZE-2,reg			\
+	; aligned_data_adjusted = aligned_data-(__FIRST_TSS_ENTRY << (3 + LOG2_PER_CPU_SIZE - 2))	\
+	; movl aligned_data_adjusted(reg),reg
+
+#else
+#define GET_CURRENT(reg,regw)	movl (aligned_data),reg
+#endif
+
+#endif /* __ASM__CURRENT_ASM_H */
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/desc.h v2.4.13-ac8+tr.5/include/asm-i386/desc.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/desc.h	Tue Nov  6 20:43:27 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/desc.h	Tue Nov  6 21:21:32 2001
@@ -68,40 +68,6 @@
 
 #define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" (__LDT(n)<<3))
 
-/*
- * This is the ldt that every process will get unless we need
- * something other than this.
- */
-extern struct desc_struct default_ldt[];
-extern void set_intr_gate(unsigned int irq, void * addr);
-extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size);
-extern void set_tss_desc(unsigned int n, void *addr);
-
-static inline void clear_LDT(void)
-{
-	int cpu = smp_processor_id();
-	set_ldt_desc(cpu, &default_ldt[0], 5);
-	__load_LDT(cpu);
-}
-
-/*
- * load one particular LDT into the current CPU
- */
-static inline void load_LDT (struct mm_struct *mm)
-{
-	int cpu = smp_processor_id();
-	void *segments = mm->context.segments;
-	int count = LDT_ENTRIES;
-
-	if (!segments) {
-		segments = &default_ldt[0];
-		count = 5;
-	}
-		
-	set_ldt_desc(cpu, segments, count);
-	__load_LDT(cpu);
-}
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/descfn.h v2.4.13-ac8+tr.5/include/asm-i386/descfn.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/descfn.h	Wed Dec 31 19:00:00 1969
+++ v2.4.13-ac8+tr.5/include/asm-i386/descfn.h	Tue Nov  6 21:23:59 2001
@@ -0,0 +1,42 @@
+#ifndef __ARCH_DESCFN_H
+#define __ARCH_DESCFN_H
+
+#ifndef __ARCH_DESC_H
+#include <asm/desc.h>
+#endif
+
+/*
+ * This is the ldt that every process will get unless we need
+ * something other than this.
+ */
+extern struct desc_struct default_ldt[];
+extern void set_intr_gate(unsigned int irq, void * addr);
+extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size);
+extern void set_tss_desc(unsigned int n, void *addr);
+
+static inline void clear_LDT(void)
+{
+	int cpu = smp_processor_id();
+	set_ldt_desc(cpu, &default_ldt[0], 5);
+	__load_LDT(cpu);
+}
+
+/*
+ * load one particular LDT into the current CPU
+ */
+static inline void load_LDT (struct mm_struct *mm)
+{
+	int cpu = smp_processor_id();
+	void *segments = mm->context.segments;
+	int count = LDT_ENTRIES;
+
+	if (!segments) {
+		segments = &default_ldt[0];
+		count = 5;
+	}
+		
+	set_ldt_desc(cpu, segments, count);
+	__load_LDT(cpu);
+}
+
+#endif /* __ARCH_DESCFN_H */
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/mmu_context.h v2.4.13-ac8+tr.5/include/asm-i386/mmu_context.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/mmu_context.h	Tue Nov  6 21:23:24 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/mmu_context.h	Sun Nov 11 19:37:12 2001
@@ -5,6 +5,7 @@
 #include <asm/desc.h>
 #include <asm/atomic.h>
 #include <asm/pgalloc.h>
+#include <asm/descfn.h>
 
 /*
  * possibly do the LDT unload here?
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/smp.h v2.4.13-ac8+tr.5/include/asm-i386/smp.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/smp.h	Tue Nov  6 21:00:35 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/smp.h	Sun Nov 11 19:37:06 2001
@@ -8,6 +8,7 @@
 #include <linux/config.h>
 #include <linux/threads.h>
 #include <linux/ptrace.h>
+#include <asm/desc.h>
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -83,10 +84,10 @@
  * the real APIC ID <-> CPU # mapping.
  */
 #define MAX_APICID 256
-extern volatile int cpu_to_physical_apicid[NR_CPUS];
-extern volatile int physical_apicid_to_cpu[MAX_APICID];
-extern volatile int cpu_to_logical_apicid[NR_CPUS];
-extern volatile int logical_apicid_to_cpu[MAX_APICID];
+extern volatile int physical_apicid_to_cpu[];
+extern volatile int cpu_to_physical_apicid[];
+extern volatile int cpu_to_logical_apicid[];
+extern volatile int logical_apicid_to_cpu[];
 
 /*
  * General functions that each host system must provide.
@@ -101,7 +102,24 @@
  * so this is correct in the x86 case.
  */
 
-#define smp_processor_id() (current->processor)
+static inline unsigned get_TR(void) __attribute__ ((pure));
+static inline unsigned get_TR(void)
+{
+	extern int dummy_cpu_id;
+	unsigned tr;
+	/* The PAIN!  The HORROR!
+	 * Technically this is wrong, wrong, wrong, but 
+	 * gas doesn't know about strl.  *sigh*  Please 
+	 * flog them with a wet noodle repeatedly.
+	 * The extra parameter is a dummy value to prevent
+	 * gcc from assuming that the value is const across
+	 * function calls.  Fun!  -ben
+	 */
+	__asm__ ("str %w0" : "=r" (tr) : "m" (dummy_cpu_id));
+	return tr;
+}
+
+#define smp_processor_id()	( ((get_TR() >> 3) - __FIRST_TSS_ENTRY) >> 2 )
 
 static __inline int hard_smp_processor_id(void)
 {
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/smpboot.h v2.4.13-ac8+tr.5/include/asm-i386/smpboot.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/smpboot.h	Fri Nov  9 23:55:07 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/smpboot.h	Sat Nov 10 15:57:58 2001
@@ -36,21 +36,21 @@
  * Mappings between logical cpu number and logical / physical apicid
  * The first four macros are trivial, but it keeps the abstraction consistent
  */
-extern volatile int logical_apicid_2_cpu[];
-extern volatile int cpu_2_logical_apicid[];
-extern volatile int physical_apicid_2_cpu[];
-extern volatile int cpu_2_physical_apicid[];
+extern volatile int logical_apicid_to_cpu[];
+extern volatile int cpu_to_logical_apicid[];
+extern volatile int physical_apicid_to_cpu[];
+extern volatile int cpu_to_physical_apicid[];
 
-#define logical_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_logical_apicid(cpu) cpu_2_logical_apicid[cpu]
-#define physical_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_physical_apicid(cpu) cpu_2_physical_apicid[cpu]
+#define logical_apicid_to_cpu(apicid) logical_apicid_to_cpu[apicid]
+#define cpu_to_logical_apicid(cpu) cpu_to_logical_apicid[cpu]
+#define physical_apicid_to_cpu(apicid) physical_apicid_to_cpu[apicid]
+#define cpu_to_physical_apicid(cpu) cpu_to_physical_apicid[cpu]
 #ifdef CONFIG_MULTIQUAD			/* use logical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) logical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_logical_apicid[cpu]
+#define boot_apicid_to_cpu(apicid) logical_apicid_to_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_to_logical_apicid[cpu]
 #else /* !CONFIG_MULTIQUAD */		/* use physical IDs to bootstrap */
-#define boot_apicid_to_cpu(apicid) physical_apicid_2_cpu[apicid]
-#define cpu_to_boot_apicid(cpu) cpu_2_physical_apicid[cpu]
+#define boot_apicid_to_cpu(apicid) physical_apicid_to_cpu[apicid]
+#define cpu_to_boot_apicid(cpu) cpu_to_physical_apicid[cpu]
 #endif /* CONFIG_MULTIQUAD */
 
 
diff -urN kernels/2.4/v2.4.13-ac8/include/asm-i386/uaccess.h v2.4.13-ac8+tr.5/include/asm-i386/uaccess.h
--- kernels/2.4/v2.4.13-ac8/include/asm-i386/uaccess.h	Wed Nov  7 18:09:12 2001
+++ v2.4.13-ac8+tr.5/include/asm-i386/uaccess.h	Sun Nov 11 21:46:52 2001
@@ -109,7 +109,7 @@
 #define __get_user_x(size,ret,x,ptr) \
 	__asm__ __volatile__("call __get_user_" #size \
 		:"=a" (ret),"=d" (x) \
-		:"0" (ptr))
+		:"0" (ptr), "1" (current))
 
 /* Careful: we have to cast the result to the type of the pointer for sign reasons */
 #define get_user(x,ptr)							\
diff -urN kernels/2.4/v2.4.13-ac8/include/linux/per_cpu.h v2.4.13-ac8+tr.5/include/linux/per_cpu.h
--- kernels/2.4/v2.4.13-ac8/include/linux/per_cpu.h	Wed Dec 31 19:00:00 1969
+++ v2.4.13-ac8+tr.5/include/linux/per_cpu.h	Thu Nov  8 22:15:26 2001
@@ -0,0 +1,32 @@
+#ifndef __LINUX__PER_CPU__H
+#define __LINUX__PER_CPU__H
+
+#define LOG2_PER_CPU_SIZE	8
+#define PER_CPU_SIZE		(1 << LOG2_PER_CPU_SIZE)
+
+#ifndef __ASSEMBLY__
+struct task_struct;
+
+struct per_cpu_data {
+	/* Assembly code relies on curr being the first member of this 
+	 * structure.  Please change it if this gets rearranged.
+	 */
+	struct task_struct	*curr;
+	cycles_t		last_schedule;
+};
+
+union aligned_data {
+	struct per_cpu_data	data;
+	char __pad [PER_CPU_SIZE];
+
+	/* Make sure the padding is large enough by forcing an error 
+	 * if it isn't.  -ben
+	 */
+	char __pad2 [PER_CPU_SIZE - sizeof(struct per_cpu_data)];
+};
+
+extern union aligned_data aligned_data[];
+
+#define per_data(nr)	(&aligned_data[nr].data)
+#endif
+#endif
diff -urN kernels/2.4/v2.4.13-ac8/init/main.c v2.4.13-ac8+tr.5/init/main.c
--- kernels/2.4/v2.4.13-ac8/init/main.c	Tue Nov  6 20:43:28 2001
+++ v2.4.13-ac8+tr.5/init/main.c	Sat Nov 10 13:11:06 2001
@@ -635,7 +635,6 @@
  * Interrupts are still disabled. Do necessary setups, then
  * enable them
  */
-	lock_kernel();
 	printk(linux_banner);
 	setup_arch(&command_line);
 	printk("Kernel command line: %s\n", saved_command_line);
@@ -646,6 +645,13 @@
 	softirq_init();
 	time_init();
 
+	/* At the very least, this has to come after trap_init as x86
+	 * needs to perform CPU setup before current is valid.  This 
+	 * should be okay as we're still running with interrupts disabled 
+	 * and no other CPUs are up yet.  -ben
+	 */
+	lock_kernel();
+
 	/*
 	 * HACK ALERT! This is early. We're enabling the console before
 	 * we've done PCI setups etc, and console_init() must be aware of
diff -urN kernels/2.4/v2.4.13-ac8/kernel/ksyms.c v2.4.13-ac8+tr.5/kernel/ksyms.c
--- kernels/2.4/v2.4.13-ac8/kernel/ksyms.c	Tue Nov  6 20:43:28 2001
+++ v2.4.13-ac8+tr.5/kernel/ksyms.c	Sat Nov 10 16:57:06 2001
@@ -447,6 +447,7 @@
 #endif
 EXPORT_SYMBOL(kstat);
 EXPORT_SYMBOL(nr_running);
+EXPORT_SYMBOL(aligned_data);
 
 /* misc */
 EXPORT_SYMBOL(panic);
diff -urN kernels/2.4/v2.4.13-ac8/kernel/sched.c v2.4.13-ac8+tr.5/kernel/sched.c
--- kernels/2.4/v2.4.13-ac8/kernel/sched.c	Tue Nov  6 20:43:28 2001
+++ v2.4.13-ac8+tr.5/kernel/sched.c	Sat Nov 10 16:29:58 2001
@@ -28,6 +28,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/completion.h>
 #include <linux/prefetch.h>
+#include <linux/per_cpu.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -97,16 +98,10 @@
  * We align per-CPU scheduling data on cacheline boundaries,
  * to prevent cacheline ping-pong.
  */
-static union {
-	struct schedule_data {
-		struct task_struct * curr;
-		cycles_t last_schedule;
-	} schedule_data;
-	char __pad [SMP_CACHE_BYTES];
-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
+union aligned_data aligned_data[NR_CPUS] __cacheline_aligned;
 
-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
+#define cpu_curr(cpu)		per_data(cpu)->curr
+#define last_schedule(cpu)	per_data(cpu)->last_schedule
 
 struct kernel_stat kstat;
 extern struct task_struct *child_reaper;
@@ -532,7 +527,7 @@
  */
 asmlinkage void schedule(void)
 {
-	struct schedule_data * sched_data;
+	struct per_cpu_data * sched_data;
 	struct task_struct *prev, *next, *p;
 	struct list_head *tmp;
 	int this_cpu, c;
@@ -543,7 +538,7 @@
 	if (!current->active_mm) BUG();
 need_resched_back:
 	prev = current;
-	this_cpu = prev->processor;
+	this_cpu = smp_processor_id();	/* This better than current->processor on up */
 
 	if (in_interrupt())
 		goto scheduling_in_interrupt;
@@ -554,7 +549,7 @@
 	 * 'sched_data' is protected by the fact that we can run
 	 * only one process per CPU.
 	 */
-	sched_data = & aligned_data[this_cpu].schedule_data;
+	sched_data = per_data(this_cpu);
 
 	spin_lock_irq(&runqueue_lock);
 
@@ -1057,7 +1052,7 @@
 	// Subtract non-idle processes running on other CPUs.
 	for (i = 0; i < smp_num_cpus; i++) {
 		int cpu = cpu_logical_map(i);
-		if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
+		if (per_data(cpu)->curr != idle_task(cpu))
 			nr_pending--;
 	}
 #else
@@ -1309,17 +1304,18 @@
 
 void __init init_idle(void)
 {
-	struct schedule_data * sched_data;
-	sched_data = &aligned_data[smp_processor_id()].schedule_data;
+	struct per_cpu_data * sched_data;
+	int cpu = smp_processor_id();
+	sched_data = per_data(cpu);
 
 	if (current != &init_task && task_on_runqueue(current)) {
 		printk("UGH! (%d:%d) was on the runqueue, removing.\n",
-			smp_processor_id(), current->pid);
+			cpu, current->pid);
 		del_from_runqueue(current);
 	}
 	sched_data->curr = current;
 	sched_data->last_schedule = get_cycles();
-	clear_bit(current->processor, &wait_init_idle);
+	clear_bit(cpu, &wait_init_idle);
 }
 
 extern void init_timervecs (void);
@@ -1334,6 +1330,7 @@
 	int nr;
 
 	init_task.processor = cpu;
+	set_current(&init_task);
 
 	for(nr = 0; nr < PIDHASH_SZ; nr++)
 		pidhash[nr] = NULL;

      reply	other threads:[~2001-11-12  3:33 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20011108190546.A29741@redhat.com>
2001-11-09  2:11 ` [PATCH] take 2 of the tr-based current Benjamin LaHaise
2001-11-09  9:13   ` Jakub Jelinek
2001-11-09 14:54     ` Manfred Spraul
2001-11-09 15:03       ` David S. Miller
2001-11-09 15:33         ` Manfred Spraul
2001-11-09 16:01           ` Richard B. Johnson
2001-11-11  0:01           ` Anton Blanchard
2001-11-11  1:01             ` Benjamin LaHaise
2001-11-11  2:27               ` Anton Blanchard
2001-11-11  9:59             ` Manfred Spraul
2001-11-11 12:36               ` Anton Blanchard
2001-11-11 14:02                 ` Manfred Spraul
2001-11-12  3:32                   ` Benjamin LaHaise [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20011111223249.B31746@redhat.com \
    --to=bcrl@redhat.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=anton@samba.org \
    --cc=arjanv@redhat.com \
    --cc=davem@redhat.com \
    --cc=jakub@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=manfred@colorfullife.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.