public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Support for assymmetric SMP
@ 2002-03-11  3:34 Kurt Garloff
  2002-03-11  4:29 ` Andrea Arcangeli
  2002-03-12 22:01 ` Frank van de Pol
  0 siblings, 2 replies; 8+ messages in thread
From: Kurt Garloff @ 2002-03-11  3:34 UTC (permalink / raw)
  To: Linux kernel list; +Cc: S. Chandra Sekharan


[-- Attachment #1.1: Type: text/plain, Size: 1131 bytes --]

Hi,

some time ago (2.4.2 time), I created a patch that allows using a
multiprocessor system with different speed ix86 CPUs and Linux.

The patch does the following:
* Make sure we got the flags right (in case they are different) before we
  enable XMM/FXSR/.... Right means common subset of supported features.
* Make cpu_khz a per CPU field and use it in the fast_get_timeofday(). The
  offset from last timer tick also must be per CPU.

The patch works fine, but it has a limited scope: It does not try to teach
the scheduler about the fact that the CPUs are different. So a process that
runs on the slower CPU does not get any bonus. It's just unlucky ...
(Some dynamic prio weighting with the cpu_khz should not be too hard in the
 goodness calc, but I wanted to keep things simple.)

I attach the patch against 2.4.16.

Chandra, this is what you've been looking for, right?

Enjoy!
-- 
Kurt Garloff  <garloff@suse.de>                          Eindhoven, NL
GPG key: See mail header, key servers         Linux kernel development
SuSE Linux AG, Nuernberg, DE                            SCSI, Security

[-- Attachment #1.2: 2.4.16-AMP-support.diff --]
[-- Type: text/plain, Size: 13370 bytes --]

--- ./arch/i386/kernel/nmi.c.orig	Fri Jan 18 22:39:20 2002
+++ ./arch/i386/kernel/nmi.c	Sat Mar  9 15:10:30 2002
@@ -161,6 +161,8 @@
 {
 	int i;
 	unsigned int evntsel;
+	int cpu = smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data[cpu];
 
 	nmi_perfctr_msr = MSR_K7_PERFCTR0;
 
@@ -175,8 +177,8 @@
 		| K7_NMI_EVENT;
 
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-	Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-	wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+	Dprintk("setting K7_PERFCTR0 to %08lx\n", -(c->cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_K7_PERFCTR0, -(c->cpu_khz/nmi_hz*1000), -1);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= K7_EVNTSEL_ENABLE;
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
@@ -186,6 +188,8 @@
 {
 	int i;
 	unsigned int evntsel;
+	int cpu = smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data[cpu];
 
 	nmi_perfctr_msr = MSR_IA32_PERFCTR0;
 
@@ -200,8 +204,8 @@
 		| P6_NMI_EVENT;
 
 	wrmsr(MSR_IA32_EVNTSEL0, evntsel, 0);
-	Dprintk("setting IA32_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
-	wrmsr(MSR_IA32_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
+	Dprintk("setting IA32_PERFCTR0 to %08lx\n", -(c->cpu_khz/nmi_hz*1000));
+	wrmsr(MSR_IA32_PERFCTR0, -(c->cpu_khz/nmi_hz*1000), 0);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= P6_EVNTSEL0_ENABLE;
 	wrmsr(MSR_IA32_EVNTSEL0, evntsel, 0);
@@ -267,6 +271,7 @@
 	 * the stack NMI-atomically, it's safe to use smp_processor_id().
 	 */
 	int sum, cpu = smp_processor_id();
+	struct cpuinfo_x86 *c = &cpu_data[cpu];
 
 	sum = apic_timer_irqs[cpu];
 
@@ -296,5 +301,5 @@
 		alert_counter[cpu] = 0;
 	}
 	if (nmi_perfctr_msr)
-		wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+		wrmsr(nmi_perfctr_msr, -(c->cpu_khz/nmi_hz*1000), -1);
 }
--- ./arch/i386/kernel/i387.c.orig	Fri Feb 23 19:09:08 2001
+++ ./arch/i386/kernel/i387.c	Sat Mar  9 14:26:15 2002
@@ -28,7 +28,7 @@
  * The _current_ task is using the FPU for the first time
  * so initialize it and set the mxcsr to its default
  * value at reset if we support XMM instructions and then
- * remeber the current task has used the FPU.
+ * remember the current task has used the FPU.
  */
 void init_fpu(void)
 {
--- ./arch/i386/kernel/mpparse.c.orig	Fri Nov  9 23:58:18 2001
+++ ./arch/i386/kernel/mpparse.c	Sat Mar  9 14:30:33 2002
@@ -135,6 +135,8 @@
 static int mpc_record; 
 static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
 
+unsigned long mp_max_features = 0xffffffff;
+
 void __init MP_processor_info (struct mpc_config_processor *m)
 {
  	int ver, quad, logical_apicid;
@@ -215,6 +217,9 @@
 	}
 
 	num_processors++;
+
+	/* Clear features that are not present on all CPUs */
+	mp_max_features &= m->mpc_featureflag;
 
 	if (m->mpc_apicid > MAX_APICS) {
 		printk("Processor #%d INVALID. (Max ID: %d).\n",
--- ./arch/i386/kernel/setup.c.orig	Sat Dec 22 14:08:42 2001
+++ ./arch/i386/kernel/setup.c	Sat Mar  9 15:03:10 2002
@@ -121,6 +121,9 @@
 struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 
 unsigned long mmu_cr4_features;
+#ifdef CONFIG_SMP
+extern unsigned long mp_max_features;
+#endif
 
 /*
  * Bus types ..
@@ -160,6 +163,7 @@
 
 static int disable_x86_serial_nr __initdata = 1;
 static int disable_x86_fxsr __initdata = 0;
+static int disable_x86_xmm __initdata = 0;
 
 extern int blk_nohighio;
 
@@ -2372,6 +2376,11 @@
 }
 __setup("nofxsr", x86_fxsr_setup);
 
+int __init x86_xmm_setup(char* s)
+{	disable_x86_xmm = 1;
+	return 1;
+}
+__setup("noxmm", x86_xmm_setup);
 
 /* Standard macro to see if a specific flag is changeable */
 static inline int flag_is_changeable_p(u32 flag)
@@ -2479,6 +2488,8 @@
 	return have_cpuid_p();	/* Check to see if CPUID now enabled? */
 }
 
+
+extern void time_init_cpu(struct cpuinfo_x86 *);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
@@ -2626,6 +2637,10 @@
 		clear_bit(X86_FEATURE_XMM, &c->x86_capability);
 	}
 
+	/* XMM/SSE disabled? */
+	if (disable_x86_xmm)
+		clear_bit(X86_FEATURE_XMM, &c->x86_capability);
+	
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
@@ -2659,7 +2674,12 @@
 		/* AND the already accumulated flags with these */
 		for ( i = 0 ; i < NCAPINTS ; i++ )
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+		time_init_cpu (c);
 	}
+#if 0 //def CONFIG_SMP
+	else
+		c->x86_capability[0] &= mp_max_features;
+#endif
 
 	printk(KERN_DEBUG "CPU:             Common caps: %08x %08x %08x %08x\n",
 	       boot_cpu_data.x86_capability[0],
@@ -2772,7 +2792,7 @@
 
 	if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
 		seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
-			cpu_khz / 1000, (cpu_khz % 1000));
+			c->cpu_khz / 1000, (c->cpu_khz % 1000));
 	}
 
 	/* Cache size */
--- ./arch/i386/kernel/smpboot.c.orig	Fri Jan 18 22:39:20 2002
+++ ./arch/i386/kernel/smpboot.c	Sat Mar  9 14:41:03 2002
@@ -30,6 +30,7 @@
  *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
  *		Martin J. Bligh	: 	Added support for multi-quad systems
+ *		Kurt Garloff	:	Support for SMP with non-identical CPUs
  */
 
 #include <linux/config.h>
@@ -46,6 +47,7 @@
 #include <asm/mtrr.h>
 #include <asm/pgalloc.h>
 #include <asm/smpboot.h>
+#include <asm/bugs.h>
 
 /* Set if we find a B stepping CPU			*/
 static int smp_b_stepping;
@@ -195,7 +197,7 @@
 
 #define NR_LOOPS 5
 
-extern unsigned long fast_gettimeoffset_quotient;
+//extern unsigned long fast_gettimeoffset_quotient;
 
 /*
  * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
@@ -236,7 +238,7 @@
 
 	printk("checking TSC synchronization across CPUs: ");
 
-	one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2);
+	one_usec = ((1<<30)/current_cpu_data.fast_gettimeoffset_quotient)*(1<<2);
 
 	atomic_set(&tsc_start_flag, 1);
 	wmb();
@@ -339,6 +341,38 @@
 }
 #undef NR_LOOPS
 
+/* Note: cpu_boot_data caps on SMP machines is filled in as follows:
+ * (1) Initialized with the boot CPU's caps (head.S)
+ * (2) AND with the common caps from the MP table (identify_cpu())
+ * (3) AND with the caps from the started CPUs (identify_cpu())
+ * (4) Finally calculated and overwritten here (smp_make_common_caps())
+ * Step 2 is actually disabled, as the info is not very reliable.
+ * Step 4 should not be needed then, actually.
+ * KG <garloff@suse.de>, 2001-03-21
+ */
+
+/* Store common subset of capabilities in boot_cpu_data */
+void smp_make_common_caps(void)
+{
+	int cpu, c;
+	unsigned long x86cap[NCAPINTS];
+	for (c = 0; c < NCAPINTS; c++)
+		x86cap[c] = 0xffffffff;
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		if (!(cpu_online_map & (1<<cpu)))
+			continue;
+		for (c = 0; c < NCAPINTS; c++)
+			x86cap[c] &= cpu_data[cpu].x86_capability[c];
+	}
+	for (c = 0; c < NCAPINTS; c++)
+		boot_cpu_data.x86_capability[c] = x86cap[c];
+	printk("CPU: Common caps: %08x %08x %08x %08x\n",
+	       boot_cpu_data.x86_capability[0],
+	       boot_cpu_data.x86_capability[1],
+	       boot_cpu_data.x86_capability[2],
+	       boot_cpu_data.x86_capability[3]);
+}
+
 extern void calibrate_delay(void);
 
 static atomic_t init_deasserted;
@@ -925,6 +959,7 @@
 {
 	unsigned long cachesize;       /* kB   */
 	unsigned long bandwidth = 350; /* MB/s */
+	unsigned long cpu_khz	= boot_cpu_data.cpu_khz;
 	/*
 	 * Rough estimation for SMP scheduling, this is the number of
 	 * cycles it takes for a fully memory-limited process to flush
@@ -981,6 +1016,12 @@
                         (u_long) xquad_portio, (u_long) XQUAD_PORTIO_LEN);
         }
 
+	/* Pieces from check_bugs() ... */
+	identify_cpu(&boot_cpu_data);
+	printk("Boot CPU: ");
+	print_cpu_info(&boot_cpu_data);
+	check_config();
+	
 #ifdef CONFIG_MTRR
 	/*  Must be done before other processors booted  */
 	mtrr_init_boot_cpu ();
@@ -1181,5 +1222,6 @@
 		synchronize_tsc_bp();
 
 smp_done:
+	smp_make_common_caps ();
 	zap_low_mappings();
 }
--- ./arch/i386/kernel/time.c.orig	Sun Nov 11 19:20:21 2001
+++ ./arch/i386/kernel/time.c	Sat Mar  9 14:26:15 2002
@@ -64,19 +64,19 @@
 #include <linux/irq.h>
 
 
-unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
+//unsigned long cpu_khz;	/* Detected as we calibrate the TSC */
 
 /* Number of usecs that the last interrupt was delayed */
 static int delay_at_last_interrupt;
 
-static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
+//static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
 
 /* Cached *multiplier* to convert TSC counts to microseconds.
  * (see the equation below).
  * Equal to 2^32 * (1 / (clocks per usec) ).
  * Initialized in time_init.
  */
-unsigned long fast_gettimeoffset_quotient;
+//unsigned long fast_gettimeoffset_quotient;
 
 extern rwlock_t xtime_lock;
 extern unsigned long wall_jiffies;
@@ -92,7 +92,7 @@
 	rdtsc(eax,edx);
 
 	/* .. relative to previous jiffy (32 bits is enough) */
-	eax -= last_tsc_low;	/* tsc_low delta */
+	eax -= current_cpu_data.last_tsc_low;	/* tsc_low delta */
 
 	/*
          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
@@ -105,7 +105,7 @@
 
 	__asm__("mull %2"
 		:"=a" (eax), "=d" (edx)
-		:"rm" (fast_gettimeoffset_quotient),
+		:"rm" (current_cpu_data.fast_gettimeoffset_quotient),
 		 "0" (eax));
 
 	/* our adjusted time offset in microseconds */
@@ -275,8 +275,8 @@
 		if (lost)
 			usec += lost * (1000000 / HZ);
 	}
-	sec = xtime.tv_sec;
-	usec += xtime.tv_usec;
+	sec = current_cpu_data.xtime.tv_sec;
+	usec += current_cpu_data.xtime.tv_usec;
 	read_unlock_irqrestore(&xtime_lock, flags);
 
 	while (usec >= 1000000) {
@@ -440,6 +440,7 @@
 		else
 			last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
 	}
+	current_cpu_data.xtime = xtime;
 	    
 #ifdef CONFIG_MCA
 	if( MCA_bus ) {
@@ -494,7 +495,7 @@
 	
 		/* read Pentium cycle counter */
 
-		rdtscl(last_tsc_low);
+		rdtscl(current_cpu_data.last_tsc_low);
 
 		spin_lock(&i8253_lock);
 		outb_p(0x00, 0x43);     /* latch the count ASAP */
@@ -597,7 +598,7 @@
 		} while ((inb(0x61) & 0x20) == 0);
 		rdtsc(endlow,endhigh);
 
-		last_tsc_low = endlow;
+		current_cpu_data.last_tsc_low = endlow;
 
 		/* Error: ECTCNEVERSET */
 		if (count <= 1)
@@ -634,7 +635,8 @@
 	return 0;
 }
 
-void __init time_init(void)
+
+void __init time_init_cpu(struct cpuinfo_x86 *c)
 {
 	extern int x86_udelay_tsc;
 	
@@ -671,7 +673,8 @@
 	if (cpu_has_tsc) {
 		unsigned long tsc_quotient = calibrate_tsc();
 		if (tsc_quotient) {
-			fast_gettimeoffset_quotient = tsc_quotient;
+			unsigned long cpu_khz;
+			c->fast_gettimeoffset_quotient = tsc_quotient;
 			use_tsc = 1;
 			/*
 			 *	We could be more selective here I suspect
@@ -694,6 +697,7 @@
 	                	"0" (eax), "1" (edx));
 				printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000);
 			}
+			c->cpu_khz = cpu_khz;
 		}
 	}
 
@@ -714,4 +718,11 @@
 #else
 	setup_irq(0, &irq0);
 #endif
+}
+
+extern struct cpuinfo_x86 boot_cpu_data;
+
+void inline __init time_init (void)
+{
+	time_init_cpu (&boot_cpu_data);
 }
--- ./init/main.c.orig	Fri Jan 18 22:39:20 2002
+++ ./init/main.c	Sat Mar  9 14:42:03 2002
@@ -608,7 +608,6 @@
 #if defined(CONFIG_SYSVIPC)
 	ipc_init();
 #endif
-	check_bugs();
 
 	/*
 	 *      We count on the initial thread going ok
@@ -616,6 +615,7 @@
 	 *      make syscalls (and thus be locked).
 	 */
 	smp_init();
+	check_bugs();
 
 	/*
 	 * Finally, we wait for all other CPU's, and initialize this
--- ./include/asm-i386/bugs.h.orig	Mon Mar  4 00:43:34 2002
+++ ./include/asm-i386/bugs.h	Sat Mar  9 14:47:09 2002
@@ -21,6 +21,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/utsname.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/msr.h>
@@ -204,12 +205,16 @@
 
 static void __init check_bugs(void)
 {
-	identify_cpu(&boot_cpu_data);
+	/* identify_cpu(&boot_cpu_data)  and
+	 * check_config()
+	 * are called in smp_boot_cpus() on SMP kernels
+	 */
 #ifndef CONFIG_SMP
+	identify_cpu(&boot_cpu_data);
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
-#endif
 	check_config();
+#endif
 	check_fpu();
 	check_hlt();
 	check_popad();
--- ./include/asm-i386/processor.h.orig	Mon Mar  4 00:43:33 2002
+++ ./include/asm-i386/processor.h	Sat Mar  9 14:28:55 2002
@@ -17,6 +17,7 @@
 #include <linux/cache.h>
 #include <linux/config.h>
 #include <linux/threads.h>
+#include <linux/time.h>
 
 /*
  * Default implementation of macro that returns current
@@ -53,6 +54,10 @@
 	unsigned long *pmd_quick;
 	unsigned long *pte_quick;
 	unsigned long pgtable_cache_sz;
+	unsigned long cpu_khz;
+	unsigned long fast_gettimeoffset_quotient;
+	unsigned long last_tsc_low;
+	struct timeval xtime;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define X86_VENDOR_INTEL 0
--- ./include/asm-i386/timex.h.orig	Fri Jan 18 23:05:37 2002
+++ ./include/asm-i386/timex.h	Sat Mar  9 14:42:47 2002
@@ -45,6 +45,6 @@
 #endif
 }
 
-extern unsigned long cpu_khz;
+//extern unsigned long cpu_khz;
 
 #endif

[-- Attachment #2: Type: application/pgp-signature, Size: 232 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2002-03-13 16:40 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-03-11  3:34 [PATCH] Support for assymmetric SMP Kurt Garloff
2002-03-11  4:29 ` Andrea Arcangeli
2002-03-11 11:25   ` Kurt Garloff
2002-03-11 12:20     ` Andrea Arcangeli
2002-03-12 15:05       ` Eric W. Biederman
2002-03-13 16:17         ` Alan Cox
2002-03-13 16:40           ` Martin J. Bligh
2002-03-12 22:01 ` Frank van de Pol

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox