public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Allow all Opteron processors to change pstate at same time
@ 2006-07-06 20:31 Mark Langsdorf
  2006-07-07 12:10 ` Andi Kleen
  0 siblings, 1 reply; 31+ messages in thread
From: Mark Langsdorf @ 2006-07-06 20:31 UTC (permalink / raw)
  To: linux-kernel; +Cc: mark.langsdorf

[-- Attachment #1: Type: text/plain, Size: 686 bytes --]

The current generation of Opteron processors do not provide a frequency 
independent TSC.  This causes wild gettimeofday skew on systems that 
enable cpufreq while using TSC as a gtod source.

This patch provides a workaround by changing all processors to the same 
frequency at the same time, so that the TSC on each processor never 
increments at a different rate than the TSC on another processor.

the "powernow-k8.tscsync=1" options enables simeltameous transitions.  
Other options are necessary to force the use of TSC as a gtod source.

This patch should apply cleanly to the 2.6.18-rc1 kernel.

-Mark Langsdorf
AMD, Inc.

signed-off-by: Mark Langsdorf <mark.langsdorf@amd.com>

[-- Attachment #2: Type: text/x-patch, Size: 7336 bytes --]

--- linux-mm2-2.6.17-rc6/arch/i386/kernel/cpu/cpufreq/powernow-k8.c.old	2006-06-12 12:22:06.000000000 -0500
+++ linux-mm2-2.6.17-rc6/arch/i386/kernel/cpu/cpufreq/powernow-k8.c	2006-06-13 15:43:39.000000000 -0500
@@ -46,13 +46,15 @@
 
 #define PFX "powernow-k8: "
 #define BFX PFX "BIOS error: "
-#define VERSION "version 2.00.00"
+#define VERSION "version 2.10.00"
 #include "powernow-k8.h"
 
 /* serialize freq changes  */
 static DEFINE_MUTEX(fidvid_mutex);
 
 static struct powernow_k8_data *powernow_data[NR_CPUS];
+static int *req_state = NULL;
+static int tscsync __initdata = 0;
 
 static int cpu_family = CPU_OPTERON;
 
@@ -205,6 +207,17 @@ static int write_new_fid(struct powernow
 	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
+	if (tscsync) {
+		int i;
+		cpumask_t oldmask = current->cpus_allowed;
+		for_each_online_cpu(i) {
+			set_cpus_allowed(current, cpumask_of_cpu(i));
+			schedule();
+			wrmsr(MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, data->plllock * PLL_LOCK_CONVERSION);
+		}
+		set_cpus_allowed(current, oldmask);
+		schedule();
+	}
 	do {
 		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
 		if (i++ > 100) {
@@ -247,6 +260,17 @@ static int write_new_vid(struct powernow
 	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
+	if (tscsync) {
+		int i;
+		cpumask_t oldmask = current->cpus_allowed;
+		for_each_online_cpu(i) {
+			set_cpus_allowed(current, cpumask_of_cpu(i));
+			schedule();
+			wrmsr(MSR_FIDVID_CTL, lo & ~MSR_C_LO_INIT_FID_VID, STOP_GRANT_5NS);
+		}
+		set_cpus_allowed(current, oldmask);
+		schedule();
+	}
 	do {
 		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
 		if (i++ > 100) {
@@ -386,7 +410,8 @@ static int core_frequency_transition(str
 	}
 
 	if (data->currfid == reqfid) {
-		printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
+		if (!tscsync)
+			printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", data->currfid);
 		return 0;
 	}
 
@@ -960,9 +985,21 @@ static int transition_frequency_fidvid(s
 	u32 vid = 0;
 	int res, i;
 	struct cpufreq_freqs freqs;
+	cpumask_t changing_cores;
 
 	dprintk("cpu %d transition to index %u\n", smp_processor_id(), index);
 
+	/* if all processors are transitioning in step, find the highest
+	 * current state and go to that
+         */
+
+	if (tscsync && req_state) {
+		req_state[smp_processor_id()] = index;
+		for_each_online_cpu(i) 
+			if (req_state[i] < index)
+				index = req_state[i];
+	}
+
 	/* fid/vid correctness check for k8 */
 	/* fid are the lower 8 bits of the index we stored into
 	 * the cpufreq frequency table in find_psb_table, vid
@@ -983,6 +1020,8 @@ static int transition_frequency_fidvid(s
 	}
 
 	if ((fid < HI_FID_TABLE_BOTTOM) && (data->currfid < HI_FID_TABLE_BOTTOM)) {
+		if (tscsync && (data->currfid == fid))
+			return 0;
 		printk(KERN_ERR PFX
 		       "ignoring illegal change in lo freq table-%x to 0x%x\n",
 		       data->currfid, fid);
@@ -994,7 +1033,11 @@ static int transition_frequency_fidvid(s
 	freqs.old = find_khz_freq_from_fid(data->currfid);
 	freqs.new = find_khz_freq_from_fid(fid);
 
-	for_each_cpu_mask(i, *(data->available_cores)) {
+	if (tscsync)
+		changing_cores = cpu_online_map;
+	else
+		changing_cores = *(data->available_cores);
+	for_each_cpu_mask(i, changing_cores) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
 	}
@@ -1002,10 +1045,16 @@ static int transition_frequency_fidvid(s
 	res = transition_fid_vid(data, fid, vid);
 	freqs.new = find_khz_freq_from_fid(data->currfid);
 
-	for_each_cpu_mask(i, *(data->available_cores)) {
+	for_each_cpu_mask(i, changing_cores) {
 		freqs.cpu = i;
 		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
 	}
+	if (tscsync) 
+		for_each_online_cpu(i)
+			if (powernow_data[i]) {
+				powernow_data[i]->currfid = data->currfid;
+				powernow_data[i]->currvid = data->currvid;
+			}
 	return res;
 }
 
@@ -1054,7 +1103,7 @@ static int powernowk8_target(struct cpuf
 	u32 checkfid;
 	u32 checkvid;
 	unsigned int newstate;
-	int ret = -EIO;
+	int ret = 0;//-EIO;
 
 	if (!data)
 		return -EINVAL;
@@ -1089,7 +1138,7 @@ static int powernowk8_target(struct cpuf
 		dprintk("targ: curr fid 0x%x, vid 0x%x\n",
 		data->currfid, data->currvid);
 
-		if ((checkvid != data->currvid) || (checkfid != data->currfid)) {
+		if (!tscsync && ((checkvid != data->currvid) || (checkfid != data->currfid))) {
 			printk(KERN_INFO PFX
 				"error - out of sync, fix 0x%x 0x%x, vid 0x%x 0x%x\n",
 				checkfid, data->currfid, checkvid, data->currvid);
@@ -1100,7 +1149,6 @@ static int powernowk8_target(struct cpuf
 		goto err_out;
 
 	mutex_lock(&fidvid_mutex);
-
 	powernow_k8_acpi_pst_values(data, newstate);
 
 	if (cpu_family)
@@ -1137,6 +1185,32 @@ static int powernowk8_verify(struct cpuf
 	return cpufreq_frequency_table_verify(pol, data->powernow_table);
 }
 
+/* On an MP system that is transitioning all cores in sync, adjust the
+ * vids for each frequency to the highest.  Otherwise, systems made up
+ * of different steppings may fail.
+ */
+static void sync_tables(int curcpu)
+{
+	int j;
+	for (j = 0; j < powernow_data[curcpu]->numps; j++) {
+		int i;
+		int maxvid = 0;
+		for_each_online_cpu(i) {
+			int testvid;
+			if (!powernow_data[i] || !powernow_data[i]->powernow_table)
+				continue;
+			testvid = powernow_data[i]->powernow_table[j].index & 0xff00;
+			if (testvid > maxvid)
+				maxvid = testvid;
+		}	
+		for_each_online_cpu(i) {
+			if (!powernow_data[i] || ! powernow_data[i]->powernow_table)
+				continue;
+			powernow_data[i]->powernow_table[j].index &= 0xff;
+			powernow_data[i]->powernow_table[j].index |= maxvid;
+		}
+	}
+}
 /* per CPU init entry point to the driver */
 static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 {
@@ -1200,6 +1274,9 @@ static int __cpuinit powernowk8_cpu_init
 	if (!cpu_family)
 		fidvid_msr_init();
 
+	if (tscsync && (cpu_family == CPU_OPTERON))
+		req_state[pol->cpu] = 0;
+
 	/* run on any CPU again */
 	set_cpus_allowed(current, oldmask);
 
@@ -1241,6 +1318,8 @@ static int __cpuinit powernowk8_cpu_init
 
 	powernow_data[pol->cpu] = data;
 
+	if (tscsync && (cpu_family == CPU_OPTERON))
+		sync_tables(pol->cpu);
 	return 0;
 
 err_out:
@@ -1323,6 +1402,13 @@ static int __cpuinit powernowk8_init(voi
 	}
 
 	if (supported_cpus == num_online_cpus()) {
+		if (tscsync) {
+			req_state = kzalloc(sizeof(int)*NR_CPUS, GFP_KERNEL);
+			if (!req_state) {
+				printk(KERN_ERR PFX "Unable to allocate memory!\n");
+				return -ENOMEM;
+			}
+		}
 		printk(KERN_INFO PFX "Found %d %s "
 			"processors (" VERSION ")\n", supported_cpus,
 			boot_cpu_data.x86_model_id);
@@ -1337,6 +1423,9 @@ static void __exit powernowk8_exit(void)
 {
 	dprintk("exit\n");
 
+	if (tscsync)
+		kfree(req_state);
+
 	cpufreq_unregister_driver(&cpufreq_amd64_driver);
 }
 
@@ -1346,3 +1435,6 @@ MODULE_LICENSE("GPL");
 
 late_initcall(powernowk8_init);
 module_exit(powernowk8_exit);
+
+module_param(tscsync, int, 0);
+MODULE_PARM_DESC(tscsync, "enable tsc by synchronizing powernow-k8 changes");

^ permalink raw reply	[flat|nested] 31+ messages in thread
* RE: [discuss] Re: [PATCH] Allow all Opteron processors to change pstate at same time
@ 2006-07-12 16:11 shin, jacob
  2006-07-12 16:14 ` Langsdorf, Mark
  2006-07-13 13:06 ` Pavel Machek
  0 siblings, 2 replies; 31+ messages in thread
From: shin, jacob @ 2006-07-12 16:11 UTC (permalink / raw)
  To: Deguara, Joachim, Andi Kleen
  Cc: Langsdorf, Mark, discuss, linux-kernel, cpufreq

On Wednesday, July 12, 2006 9:06 AM Joachim Deguara wrote:

> Here are the further findings after letting the machine toggle between
> 1GHz and 2.2Ghz every two seconds for roughly 24 hours.  Unfortunately
> there is an oops after bringing CPU2 online and CPU3 will not come
> online.  Still the differences in TSC are not bad:

Can I get more information on how to reproduce the Oops? Kernel version?
.config? your hardware?

I have run basic set of CPU Hotplug on/offline tests, and I could not
reproduce it..

-Jacob Shin
AMD, Inc.



^ permalink raw reply	[flat|nested] 31+ messages in thread
* RE: [discuss] Re: [PATCH] Allow all Opteron processors to change pstate at same time
@ 2006-07-13 13:37 Bhavana Nagendra
  0 siblings, 0 replies; 31+ messages in thread
From: Bhavana Nagendra @ 2006-07-13 13:37 UTC (permalink / raw)
  To: linux-kernel

Here are some results without changing frequencies on a system whose 
BIOS does not support Power Now! on MP systems:

Basically the system booted up with "nohpet, nopmtimer"i.e. using TSC as 
the GTOD time source and system stayed idle for 13 hours.   There 
appears to be drift of 20 secs in the CPU 2 readings.    This TSC drift 
will be worse when
the system is active and doing GTOD operations.

CPU 2: Syncing TSC to CPU 0.
CPU 2: synchronized TSC with CPU 0 (last diff -108 cycles, maxerr 826 
cycles)
CPU 3: Syncing TSC to CPU 0.
CPU 3: synchronized TSC with CPU 0 (last diff -119 cycles, maxerr 845 
cycles)

*** CPUs go offline ***

*** back online ***

CPU 2: Syncing TSC to CPU 0.
CPU 2: synchronized TSC with CPU 0 (last diff -117 cycles, maxerr 846 
cycles)
CPU 3: Syncing TSC to CPU 0.
CPU 3: synchronized TSC with CPU 0 (last diff -117 cycles, maxerr 845 
cycles)

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2006-07-26 18:59 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-06 20:31 [PATCH] Allow all Opteron processors to change pstate at same time Mark Langsdorf
2006-07-07 12:10 ` Andi Kleen
2006-07-07 17:36   ` [discuss] " Langsdorf, Mark
2006-07-10 12:45     ` Joachim Deguara
2006-07-10 13:02       ` Joachim Deguara
2006-07-11 12:55   ` Joachim Deguara
2006-07-11 13:07     ` Andi Kleen
2006-07-11 13:14       ` Arjan van de Ven
2006-07-11 16:15         ` Alan Cox
2006-07-11 16:01           ` Arjan van de Ven
2006-07-11 16:04           ` Andi Kleen
2006-07-11 13:31       ` Langsdorf, Mark
2006-07-11 13:34         ` Arjan van de Ven
2006-07-11 13:51           ` Andi Kleen
2006-07-11 13:14     ` [OT] Evolution use (was: Re: [discuss] Re: [PATCH] Allow all Opteron processors to change pstate at same time) Xavier Bestel
2006-07-12 14:06     ` [discuss] Re: [PATCH] Allow all Opteron processors to change pstate at same time Joachim Deguara
2006-07-12 14:54       ` Andi Kleen
2006-07-25 21:47   ` Langsdorf, Mark
2006-07-26 11:31     ` Joachim Deguara
2006-07-26 16:42   ` Langsdorf, Mark
2006-07-26 16:54     ` Andi Kleen
2006-07-26 18:34       ` Langsdorf, Mark
2006-07-26 18:53         ` Andi Kleen
  -- strict thread matches above, loose matches on Subject: below --
2006-07-12 16:11 shin, jacob
2006-07-12 16:14 ` Langsdorf, Mark
2006-07-13 13:06 ` Pavel Machek
2006-07-13 14:32   ` Joachim Deguara
2006-07-16  1:56     ` Pavel Machek
2006-07-17  7:37       ` Joachim Deguara
2006-07-20 15:59         ` Pavel Machek
2006-07-13 13:37 Bhavana Nagendra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox