From: Ingo Molnar <mingo@elte.hu>
To: Guillaume Chazarain <guichaz@yahoo.fr>
Cc: Thomas Gleixner <tglx@linutronix.de>,
Stefano Brivio <stefano.brivio@polimi.it>,
Robert Love <rml@tech9.net>,
linux-kernel@vger.kernel.org, Dave Jones <davej@redhat.com>,
"Rafael J. Wysocki" <rjw@sisk.pl>, Michael Buesch <mb@bu3sch.de>,
"Andrew Morton"@pimp.vs19.net
Subject: [patch] x86: scale cyc_2_nsec according to CPU frequency
Date: Fri, 7 Dec 2007 14:55:25 +0100 [thread overview]
Message-ID: <20071207135525.GA5588@elte.hu> (raw)
In-Reply-To: <20071207090252.1caf1509@inria.fr>
* Guillaume Chazarain <guichaz@yahoo.fr> wrote:
> > > Hmrpf. sched_clock() is used for the time stamp of the printks. We
> > > need to find some better solution other than killing off the tsc
> > > access completely.
> >
> > Something like http://lkml.org/lkml/2007/3/16/291 that would need
> > some refresh?
>
> And here is a refreshed one just for testing with 2.6-git. The 64 bit
> part is a shamelessly untested copy/paste as I cannot test it.
Guillaume, i've updated your patch with a handful of changes - see the
result below.
Firstly, we dont need the 'offset' anymore because cpu_clock() maintains
offsets itself. This simplifies the math and speeds up the sched_clock()
common case.
Secondly, with PER_CPU variables we need to update them for all possible
CPUs - otherwise they might end up with a zero scaling factor which is
not good. (not all CPUs are cpufreq capable)
Thirdly, we can do a bit smarter and faster by using the fact that
local_irq_disable() is preempt-safe - so we can use per_cpu() instead of
get_cpu_var().
Ingo
----------------->
Subject: x86: scale cyc_2_nsec according to CPU frequency
From: "Guillaume Chazarain" <guichaz@yahoo.fr>
scale the sched_clock() cyc_2_nsec scaling factor according to
CPU frequency changes.
[ mingo@elte.hu: simplified it and fixed it for SMP. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/kernel/tsc_32.c | 41 +++++++++++++++++++++++++++-----
arch/x86/kernel/tsc_64.c | 59 +++++++++++++++++++++++++++++++++++++++--------
include/asm-x86/timer.h | 23 ++++++++++++++----
3 files changed, 102 insertions(+), 21 deletions(-)
Index: linux-x86.q/arch/x86/kernel/tsc_32.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/tsc_32.c
+++ linux-x86.q/arch/x86/kernel/tsc_32.c
@@ -5,6 +5,7 @@
#include <linux/jiffies.h>
#include <linux/init.h>
#include <linux/dmi.h>
+#include <linux/percpu.h>
#include <asm/delay.h>
#include <asm/tsc.h>
@@ -78,15 +79,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
+ * ns += offset to avoid sched_clock jumps with cpufreq
+ *
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-unsigned long cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
-static inline void set_cyc2ns_scale(unsigned long cpu_khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
- cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ unsigned long flags, prev_scale, *scale;
+ unsigned long long tsc_now, ns_now;
+
+ local_irq_save(flags);
+ scale = &per_cpu(cyc2ns, cpu);
+
+ rdtscll(tsc_now);
+ ns_now = __cycles_2_ns(tsc_now);
+
+ prev_scale = *scale;
+ if (cpu_khz)
+ *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+ printk("CPU#%d: changed cyc2ns scale from %ld to %ld\n",
+ cpu, prev_scale, *scale);
+ local_irq_restore(flags);
}
/*
@@ -239,7 +256,9 @@ time_cpufreq_notifier(struct notifier_bl
ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
tsc_khz = cpu_khz;
- set_cyc2ns_scale(cpu_khz);
+ preempt_disable();
+ set_cyc2ns_scale(cpu_khz, smp_processor_id());
+ preempt_enable();
/*
* TSC based sched_clock turns
* to junk w/ cpufreq
@@ -367,6 +386,8 @@ static inline void check_geode_tsc_relia
void __init tsc_init(void)
{
+ int cpu;
+
if (!cpu_has_tsc || tsc_disable)
goto out_no_tsc;
@@ -380,7 +401,15 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
- set_cyc2ns_scale(cpu_khz);
+ /*
+ * Secondary CPUs do not run through tsc_init(), so set up
+ * all the scale factors for all CPUs, assuming the same
+ * speed as the bootup CPU. (cpufreq notifiers will fix this
+ * up if their speed diverges)
+ */
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(cpu_khz, cpu);
+
use_tsc_delay();
/* Check and install the TSC clocksource */
Index: linux-x86.q/arch/x86/kernel/tsc_64.c
===================================================================
--- linux-x86.q.orig/arch/x86/kernel/tsc_64.c
+++ linux-x86.q/arch/x86/kernel/tsc_64.c
@@ -10,6 +10,7 @@
#include <asm/hpet.h>
#include <asm/timex.h>
+#include <asm/timer.h>
static int notsc __initdata = 0;
@@ -18,16 +19,50 @@ EXPORT_SYMBOL(cpu_khz);
unsigned int tsc_khz;
EXPORT_SYMBOL(tsc_khz);
-static unsigned int cyc2ns_scale __read_mostly;
+/* Accelerators for sched_clock()
+ * convert from cycles(64bits) => nanoseconds (64bits)
+ * basic equation:
+ * ns = cycles / (freq / ns_per_sec)
+ * ns = cycles * (ns_per_sec / freq)
+ * ns = cycles * (10^9 / (cpu_khz * 10^3))
+ * ns = cycles * (10^6 / cpu_khz)
+ *
+ * Then we use scaling math (suggested by george@mvista.com) to get:
+ * ns = cycles * (10^6 * SC / cpu_khz) / SC
+ * ns = cycles * cyc2ns_scale / SC
+ *
+ * And since SC is a constant power of two, we can convert the div
+ * into a shift.
+ *
+ * We can use khz divisor instead of mhz to keep a better precision, since
+ * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
+ * (mathieu.desnoyers@polymtl.ca)
+ *
+ * ns += offset to avoid sched_clock jumps with cpufreq
+ *
+ * -johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+DEFINE_PER_CPU(unsigned long, cyc2ns);
-static inline void set_cyc2ns_scale(unsigned long khz)
+static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
{
- cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
-}
+ unsigned long flags, prev_scale, *scale;
+ unsigned long long tsc_now, ns_now;
-static unsigned long long cycles_2_ns(unsigned long long cyc)
-{
- return (cyc * cyc2ns_scale) >> NS_SCALE;
+ local_irq_save(flags);
+ scale = &per_cpu(cyc2ns, cpu);
+
+ rdtscll(tsc_now);
+ ns_now = __cycles_2_ns(tsc_now);
+
+ prev_scale = *scale;
+ if (cpu_khz)
+ *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+
+ printk("CPU#%d: changed cyc2ns scale from %ld to %ld\n",
+ cpu, prev_scale, *scale);
+
+ local_irq_restore(flags);
}
unsigned long long sched_clock(void)
@@ -100,7 +135,9 @@ static int time_cpufreq_notifier(struct
mark_tsc_unstable("cpufreq changes");
}
- set_cyc2ns_scale(tsc_khz_ref);
+ preempt_disable();
+ set_cyc2ns_scale(tsc_khz_ref, smp_processor_id());
+ preempt_enable();
return 0;
}
@@ -151,7 +188,7 @@ static unsigned long __init tsc_read_ref
void __init tsc_calibrate(void)
{
unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
- int hpet = is_hpet_enabled();
+ int hpet = is_hpet_enabled(), cpu;
local_irq_save(flags);
@@ -206,7 +243,9 @@ void __init tsc_calibrate(void)
}
tsc_khz = tsc2 / tsc1;
- set_cyc2ns_scale(tsc_khz);
+
+ for_each_possible_cpu(cpu)
+ set_cyc2ns_scale(tsc_khz, cpu);
}
/*
Index: linux-x86.q/include/asm-x86/timer.h
===================================================================
--- linux-x86.q.orig/include/asm-x86/timer.h
+++ linux-x86.q/include/asm-x86/timer.h
@@ -2,6 +2,7 @@
#define _ASMi386_TIMER_H
#include <linux/init.h>
#include <linux/pm.h>
+#include <linux/percpu.h>
#define TICK_SIZE (tick_nsec / 1000)
@@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void);
#define calculate_cpu_khz() native_calculate_cpu_khz()
#endif
-/* Accellerators for sched_clock()
+/* Accelerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
@@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void);
* And since SC is a constant power of two, we can convert the div
* into a shift.
*
- * We can use khz divisor instead of mhz to keep a better percision, since
+ * We can use khz divisor instead of mhz to keep a better precision, since
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-extern unsigned long cyc2ns_scale __read_mostly;
+
+DECLARE_PER_CPU(unsigned long, cyc2ns);
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+ return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR;
}
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+ unsigned long long ns;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ ns = __cycles_2_ns(cyc);
+ local_irq_restore(flags);
+
+ return ns;
+}
#endif
next prev parent reply other threads:[~2007-12-07 13:56 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-12-07 1:19 [PATCH] scheduler: fix x86 regression in native_sched_clock Stefano Brivio
2007-12-07 5:29 ` Nick Piggin
2007-12-07 5:51 ` Thomas Gleixner
2007-12-07 7:18 ` Guillaume Chazarain
2007-12-07 8:02 ` Guillaume Chazarain
2007-12-07 8:51 ` Ingo Molnar
2007-12-07 9:29 ` Guillaume Chazarain
2007-12-07 9:59 ` Ingo Molnar
2007-12-07 13:55 ` Ingo Molnar [this message]
2007-12-07 14:27 ` [patch] x86: scale cyc_2_nsec according to CPU frequency Guillaume Chazarain
2007-12-07 14:52 ` Ingo Molnar
2007-12-08 15:57 ` Arjan van de Ven
2007-12-08 19:16 ` Ingo Molnar
2007-12-08 20:18 ` Arjan van de Ven
2007-12-07 10:37 ` [PATCH] scheduler: fix x86 regression in native_sched_clock Andi Kleen
2007-12-07 8:45 ` Ingo Molnar
2007-12-07 10:32 ` Andrew Morton
2007-12-07 10:40 ` Ingo Molnar
2007-12-07 11:07 ` Ingo Molnar
2007-12-07 11:09 ` Andrew Morton
2007-12-07 11:12 ` Ingo Molnar
2007-12-07 11:13 ` Nick Piggin
2007-12-07 11:17 ` Ingo Molnar
2007-12-07 16:48 ` Nick Piggin
2007-12-08 0:50 ` Nick Piggin
2007-12-08 0:57 ` Nick Piggin
2007-12-08 8:52 ` Ingo Molnar
2007-12-08 23:37 ` Guillaume Chazarain
2007-12-12 4:42 ` Nick Piggin
2007-12-12 10:44 ` Ingo Molnar
2007-12-07 11:18 ` Guillaume Chazarain
2007-12-07 11:57 ` Guillaume Chazarain
2007-12-07 11:23 ` stefano.brivio
2007-12-07 12:11 ` Ingo Molnar
2007-12-07 12:25 ` Ingo Molnar
2007-12-07 12:35 ` Ingo Molnar
2007-12-07 12:40 ` Ingo Molnar
2007-12-07 14:54 ` Ingo Molnar
2007-12-07 16:46 ` Guillaume Chazarain
2007-12-07 17:57 ` Ingo Molnar
2007-12-08 15:06 ` Mark Lord
2007-12-08 15:13 ` Ingo Molnar
2007-12-08 15:27 ` Michael Buesch
2007-12-08 15:33 ` Ingo Molnar
2007-12-08 15:36 ` Michael Buesch
2007-12-08 15:41 ` Ingo Molnar
2007-12-07 11:24 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20071207135525.GA5588@elte.hu \
--to=mingo@elte.hu \
--cc="Andrew Morton"@pimp.vs19.net \
--cc=davej@redhat.com \
--cc=guichaz@yahoo.fr \
--cc=linux-kernel@vger.kernel.org \
--cc=mb@bu3sch.de \
--cc=rjw@sisk.pl \
--cc=rml@tech9.net \
--cc=stefano.brivio@polimi.it \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.