From: Thomas Gleixner <tglx@linutronix.de>
To: Andrew Morton <akpm@osdl.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>,
John Stultz <johnstul@us.ibm.com>,
Arjan van de Veen <arjan@infradead.org>,
Roman Zippel <zippel@linux-m68k.org>
Subject: [patch 11/46] x86: rewrite SMP TSC sync code
Date: Tue, 23 Jan 2007 22:01:06 -0000 [thread overview]
Message-ID: <20070123211204.388919000@localhost.localdomain> (raw)
In-Reply-To: 20070123211159.178138000@localhost.localdomain
[-- Attachment #1: x86-tsc-sync-rewrite.patch --]
[-- Type: text/plain, Size: 27495 bytes --]
From: Ingo Molnar <mingo@elte.hu>
make the TSC synchronization code more robust, and unify
it between x86_64 and i386.
The biggest change is the removal of the 'fix up TSCs' code
on x86_64 and i386, in some rare cases it was /causing/
time-warps on SMP systems.
The new code only checks for TSC asynchronity - and if it can
prove a time-warp (if it can observe the TSC going backwards
when going from one CPU to another within a critical section),
then the TSC clock-source is turned off.
The TSC synchronization-checking code also got moved into a
separate file.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/i386/kernel/Makefile | 2
arch/i386/kernel/smpboot.c | 178 ++------------------------------
arch/i386/kernel/tsc.c | 4
arch/i386/kernel/tsc_sync.c | 1
arch/x86_64/kernel/Makefile | 2
arch/x86_64/kernel/smpboot.c | 230 ++----------------------------------------
arch/x86_64/kernel/time.c | 11 ++
arch/x86_64/kernel/tsc_sync.c | 187 ++++++++++++++++++++++++++++++++++
include/asm-i386/tsc.h | 49 --------
include/asm-x86_64/proto.h | 2
include/asm-x86_64/timex.h | 26 ----
include/asm-x86_64/tsc.h | 66 ++++++++++++
12 files changed, 295 insertions(+), 463 deletions(-)
Index: linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/i386/kernel/Makefile
+++ linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_APM) += apm.o
-obj-$(CONFIG_X86_SMP) += smp.o smpboot.o
+obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
Index: linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/smpboot.c
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/i386/kernel/smpboot.c
+++ linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/smpboot.c
@@ -93,12 +93,6 @@ cpumask_t cpu_possible_map;
EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
-/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
- * is no way to resync one AP against BP. TBD: for prescott and above, we
- * should use IA64's algorithm
- */
-static int __devinitdata tsc_sync_disabled;
-
/* Per CPU bogomips and other parameters */
struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_data);
@@ -215,151 +209,6 @@ valid_k7:
;
}
-/*
- * TSC synchronization.
- *
- * We first check whether all CPUs have their TSC's synchronized,
- * then we print a warning if not, and always resync.
- */
-
-static struct {
- atomic_t start_flag;
- atomic_t count_start;
- atomic_t count_stop;
- unsigned long long values[NR_CPUS];
-} tsc __cpuinitdata = {
- .start_flag = ATOMIC_INIT(0),
- .count_start = ATOMIC_INIT(0),
- .count_stop = ATOMIC_INIT(0),
-};
-
-#define NR_LOOPS 5
-
-static void __init synchronize_tsc_bp(void)
-{
- int i;
- unsigned long long t0;
- unsigned long long sum, avg;
- long long delta;
- unsigned int one_usec;
- int buggy = 0;
-
- printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
-
- /* convert from kcyc/sec to cyc/usec */
- one_usec = cpu_khz / 1000;
-
- atomic_set(&tsc.start_flag, 1);
- wmb();
-
- /*
- * We loop a few times to get a primed instruction cache,
- * then the last pass is more or less synchronized and
- * the BP and APs set their cycle counters to zero all at
- * once. This reduces the chance of having random offsets
- * between the processors, and guarantees that the maximum
- * delay between the cycle counters is never bigger than
- * the latency of information-passing (cachelines) between
- * two CPUs.
- */
- for (i = 0; i < NR_LOOPS; i++) {
- /*
- * all APs synchronize but they loop on '== num_cpus'
- */
- while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
- cpu_relax();
- atomic_set(&tsc.count_stop, 0);
- wmb();
- /*
- * this lets the APs save their current TSC:
- */
- atomic_inc(&tsc.count_start);
-
- rdtscll(tsc.values[smp_processor_id()]);
- /*
- * We clear the TSC in the last loop:
- */
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- /*
- * Wait for all APs to leave the synchronization point:
- */
- while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
- cpu_relax();
- atomic_set(&tsc.count_start, 0);
- wmb();
- atomic_inc(&tsc.count_stop);
- }
-
- sum = 0;
- for (i = 0; i < NR_CPUS; i++) {
- if (cpu_isset(i, cpu_callout_map)) {
- t0 = tsc.values[i];
- sum += t0;
- }
- }
- avg = sum;
- do_div(avg, num_booting_cpus());
-
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- delta = tsc.values[i] - avg;
- if (delta < 0)
- delta = -delta;
- /*
- * We report bigger than 2 microseconds clock differences.
- */
- if (delta > 2*one_usec) {
- long long realdelta;
-
- if (!buggy) {
- buggy = 1;
- printk("\n");
- }
- realdelta = delta;
- do_div(realdelta, one_usec);
- if (tsc.values[i] < avg)
- realdelta = -realdelta;
-
- if (realdelta)
- printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
- "skew, fixed it up.\n", i, realdelta);
- }
- }
- if (!buggy)
- printk("passed.\n");
-}
-
-static void __cpuinit synchronize_tsc_ap(void)
-{
- int i;
-
- /*
- * Not every cpu is online at the time
- * this gets called, so we first wait for the BP to
- * finish SMP initialization:
- */
- while (!atomic_read(&tsc.start_flag))
- cpu_relax();
-
- for (i = 0; i < NR_LOOPS; i++) {
- atomic_inc(&tsc.count_start);
- while (atomic_read(&tsc.count_start) != num_booting_cpus())
- cpu_relax();
-
- rdtscll(tsc.values[smp_processor_id()]);
- if (i == NR_LOOPS-1)
- write_tsc(0, 0);
-
- atomic_inc(&tsc.count_stop);
- while (atomic_read(&tsc.count_stop) != num_booting_cpus())
- cpu_relax();
- }
-}
-#undef NR_LOOPS
-
extern void calibrate_delay(void);
static atomic_t init_deasserted;
@@ -445,12 +294,6 @@ static void __cpuinit smp_callin(void)
* Allow the master to continue.
*/
cpu_set(cpuid, cpu_callin_map);
-
- /*
- * Synchronize the TSC with the BP
- */
- if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
- synchronize_tsc_ap();
}
static int cpucount;
@@ -553,6 +396,11 @@ static void __cpuinit start_secondary(vo
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
rep_nop();
+ /*
+ * Check TSC synchronization with the BP:
+ */
+ check_tsc_sync_target();
+
setup_secondary_clock();
if (nmi_watchdog == NMI_IO_APIC) {
disable_8259A_irq(0);
@@ -1122,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(i
info.cpu = cpu;
INIT_WORK(&info.task, do_warm_boot_cpu);
- tsc_sync_disabled = 1;
-
/* init low mem mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
@@ -1131,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(i
schedule_work(&info.task);
wait_for_completion(&done);
- tsc_sync_disabled = 0;
zap_low_mappings();
ret = 0;
exit:
@@ -1328,12 +1173,6 @@ static void __init smp_boot_cpus(unsigne
smpboot_setup_io_apic();
setup_boot_clock();
-
- /*
- * Synchronize the TSC with the AP
- */
- if (cpu_has_tsc && cpucount && cpu_khz)
- synchronize_tsc_bp();
}
/* These are wrappers to interface to the new boot process. Someone
@@ -1468,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
}
local_irq_enable();
+
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
/* Unleash the CPU! */
cpu_set(cpu, smp_commenced_mask);
+
+ /*
+ * Check TSC synchronization with the AP:
+ */
+ check_tsc_sync_source(cpu);
+
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
return 0;
Index: linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/tsc.c
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/i386/kernel/tsc.c
+++ linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/tsc.c
@@ -407,8 +407,10 @@ out:
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs.
*/
-static __init int unsynchronized_tsc(void)
+__cpuinit int unsynchronized_tsc(void)
{
+ if (!cpu_has_tsc || tsc_unstable)
+ return 1;
/*
* Intel systems are normally all synchronized.
* Exceptions must mark TSC as unstable:
Index: linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/tsc_sync.c
===================================================================
--- /dev/null
+++ linux-2.6.20-rc4-mm1-bo/arch/i386/kernel/tsc_sync.c
@@ -0,0 +1 @@
+#include "../../x86_64/kernel/tsc_sync.c"
Index: linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/x86_64/kernel/Makefile
+++ linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
-obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
obj-y += apic.o nmi.o
obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
Index: linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/smpboot.c
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/x86_64/kernel/smpboot.c
+++ linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/smpboot.c
@@ -147,217 +147,6 @@ static void __cpuinit smp_store_cpu_info
print_cpu_info(c);
}
-/*
- * New Funky TSC sync algorithm borrowed from IA64.
- * Main advantage is that it doesn't reset the TSCs fully and
- * in general looks more robust and it works better than my earlier
- * attempts. I believe it was written by David Mosberger. Some minor
- * adjustments for x86-64 by me -AK
- *
- * Original comment reproduced below.
- *
- * Synchronize TSC of the current (slave) CPU with the TSC of the
- * MASTER CPU (normally the time-keeper CPU). We use a closed loop to
- * eliminate the possibility of unaccounted-for errors (such as
- * getting a machine check in the middle of a calibration step). The
- * basic idea is for the slave to ask the master what itc value it has
- * and to read its own itc before and after the master responds. Each
- * iteration gives us three timestamps:
- *
- * slave master
- *
- * t0 ---\
- * ---\
- * --->
- * tm
- * /---
- * /---
- * t1 <---
- *
- *
- * The goal is to adjust the slave's TSC such that tm falls exactly
- * half-way between t0 and t1. If we achieve this, the clocks are
- * synchronized provided the interconnect between the slave and the
- * master is symmetric. Even if the interconnect were asymmetric, we
- * would still know that the synchronization error is smaller than the
- * roundtrip latency (t0 - t1).
- *
- * When the interconnect is quiet and symmetric, this lets us
- * synchronize the TSC to within one or two cycles. However, we can
- * only *guarantee* that the synchronization is accurate to within a
- * round-trip time, which is typically in the range of several hundred
- * cycles (e.g., ~500 cycles). In practice, this means that the TSCs
- * are usually almost perfectly synchronized, but we shouldn't assume
- * that the accuracy is much better than half a micro second or so.
- *
- * [there are other errors like the latency of RDTSC and of the
- * WRMSR. These can also account to hundreds of cycles. So it's
- * probably worse. It claims 153 cycles error on a dual Opteron,
- * but I suspect the numbers are actually somewhat worse -AK]
- */
-
-#define MASTER 0
-#define SLAVE (SMP_CACHE_BYTES/8)
-
-/* Intentionally don't use cpu_relax() while TSC synchronization
- because we don't want to go into funky power save modi or cause
- hypervisors to schedule us away. Going to sleep would likely affect
- latency and low latency is the primary objective here. -AK */
-#define no_cpu_relax() barrier()
-
-static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
-static volatile __cpuinitdata unsigned long go[SLAVE + 1];
-static int notscsync __cpuinitdata;
-
-#undef DEBUG_TSC_SYNC
-
-#define NUM_ROUNDS 64 /* magic value */
-#define NUM_ITERS 5 /* likewise */
-
-/* Callback on boot CPU */
-static __cpuinit void sync_master(void *arg)
-{
- unsigned long flags, i;
-
- go[MASTER] = 0;
-
- local_irq_save(flags);
- {
- for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
- while (!go[MASTER])
- no_cpu_relax();
- go[MASTER] = 0;
- rdtscll(go[SLAVE]);
- }
- }
- local_irq_restore(flags);
-}
-
-/*
- * Return the number of cycles by which our tsc differs from the tsc
- * on the master (time-keeper) CPU. A positive number indicates our
- * tsc is ahead of the master, negative that it is behind.
- */
-static inline long
-get_delta(long *rt, long *master)
-{
- unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
- unsigned long tcenter, t0, t1, tm;
- int i;
-
- for (i = 0; i < NUM_ITERS; ++i) {
- rdtscll(t0);
- go[MASTER] = 1;
- while (!(tm = go[SLAVE]))
- no_cpu_relax();
- go[SLAVE] = 0;
- rdtscll(t1);
-
- if (t1 - t0 < best_t1 - best_t0)
- best_t0 = t0, best_t1 = t1, best_tm = tm;
- }
-
- *rt = best_t1 - best_t0;
- *master = best_tm - best_t0;
-
- /* average best_t0 and best_t1 without overflow: */
- tcenter = (best_t0/2 + best_t1/2);
- if (best_t0 % 2 + best_t1 % 2 == 2)
- ++tcenter;
- return tcenter - best_tm;
-}
-
-static __cpuinit void sync_tsc(unsigned int master)
-{
- int i, done = 0;
- long delta, adj, adjust_latency = 0;
- unsigned long flags, rt, master_time_stamp, bound;
-#ifdef DEBUG_TSC_SYNC
- static struct syncdebug {
- long rt; /* roundtrip time */
- long master; /* master's timestamp */
- long diff; /* difference between midpoint and master's timestamp */
- long lat; /* estimate of tsc adjustment latency */
- } t[NUM_ROUNDS] __cpuinitdata;
-#endif
-
- printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n",
- smp_processor_id(), master);
-
- go[MASTER] = 1;
-
- /* It is dangerous to broadcast IPI as cpus are coming up,
- * as they may not be ready to accept them. So since
- * we only need to send the ipi to the boot cpu direct
- * the message, and avoid the race.
- */
- smp_call_function_single(master, sync_master, NULL, 1, 0);
-
- while (go[MASTER]) /* wait for master to be ready */
- no_cpu_relax();
-
- spin_lock_irqsave(&tsc_sync_lock, flags);
- {
- for (i = 0; i < NUM_ROUNDS; ++i) {
- delta = get_delta(&rt, &master_time_stamp);
- if (delta == 0) {
- done = 1; /* let's lock on to this... */
- bound = rt;
- }
-
- if (!done) {
- unsigned long t;
- if (i > 0) {
- adjust_latency += -delta;
- adj = -delta + adjust_latency/4;
- } else
- adj = -delta;
-
- rdtscll(t);
- wrmsrl(MSR_IA32_TSC, t + adj);
- }
-#ifdef DEBUG_TSC_SYNC
- t[i].rt = rt;
- t[i].master = master_time_stamp;
- t[i].diff = delta;
- t[i].lat = adjust_latency/4;
-#endif
- }
- }
- spin_unlock_irqrestore(&tsc_sync_lock, flags);
-
-#ifdef DEBUG_TSC_SYNC
- for (i = 0; i < NUM_ROUNDS; ++i)
- printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
- t[i].rt, t[i].master, t[i].diff, t[i].lat);
-#endif
-
- printk(KERN_INFO
- "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
- "maxerr %lu cycles)\n",
- smp_processor_id(), master, delta, rt);
-}
-
-static void __cpuinit tsc_sync_wait(void)
-{
- /*
- * When the CPU has synchronized TSCs assume the BIOS
- * or the hardware already synced. Otherwise we could
- * mess up a possible perfect synchronization with a
- * not-quite-perfect algorithm.
- */
- if (notscsync || !cpu_has_tsc || !unsynchronized_tsc())
- return;
- sync_tsc(0);
-}
-
-static __init int notscsync_setup(char *s)
-{
- notscsync = 1;
- return 1;
-}
-__setup("notscsync", notscsync_setup);
-
static atomic_t init_deasserted __cpuinitdata;
/*
@@ -545,6 +334,11 @@ void __cpuinit start_secondary(void)
/* otherwise gcc will move up the smp_processor_id before the cpu_init */
barrier();
+ /*
+ * Check TSC sync first:
+ */
+ check_tsc_sync_target();
+
Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
setup_secondary_APIC_clock();
@@ -564,14 +358,6 @@ void __cpuinit start_secondary(void)
*/
set_cpu_sibling_map(smp_processor_id());
- /*
- * Wait for TSC sync to not schedule things before.
- * We still process interrupts, which could see an inconsistent
- * time in that window unfortunately.
- * Do this here because TSC sync has global unprotected state.
- */
- tsc_sync_wait();
-
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
@@ -591,6 +377,7 @@ void __cpuinit start_secondary(void)
cpu_set(smp_processor_id(), cpu_online_map);
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
spin_unlock(&vector_lock);
+
unlock_ipi_call_lock();
cpu_idle();
@@ -1166,6 +953,11 @@ int __cpuinit __cpu_up(unsigned int cpu)
/* Unleash the CPU! */
Dprintk("waiting for cpu %d\n", cpu);
+ /*
+ * Make sure and check TSC sync:
+ */
+ check_tsc_sync_source(cpu);
+
while (!cpu_isset(cpu, cpu_online_map))
cpu_relax();
err = 0;
Index: linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/time.c
@@ -939,12 +939,23 @@ void __init time_init(void)
#endif
}
+static int tsc_unstable = 0;
+
+void mark_tsc_unstable(void)
+{
+ tsc_unstable = 1;
+}
+EXPORT_SYMBOL_GPL(mark_tsc_unstable);
+
/*
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs.
*/
__cpuinit int unsynchronized_tsc(void)
{
+ if (tsc_unstable)
+ return 1;
+
#ifdef CONFIG_SMP
if (apic_is_clustered_box())
return 1;
Index: linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/tsc_sync.c
===================================================================
--- /dev/null
+++ linux-2.6.20-rc4-mm1-bo/arch/x86_64/kernel/tsc_sync.c
@@ -0,0 +1,187 @@
+/*
+ * arch/x86_64/kernel/tsc_sync.c: check TSC synchronization.
+ *
+ * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
+ *
+ * We check whether all boot CPUs have their TSC's synchronized,
+ * print a warning if not and turn off the TSC clock-source.
+ *
+ * The warp-check is point-to-point between two CPUs, the CPU
+ * initiating the bootup is the 'source CPU', the freshly booting
+ * CPU is the 'target CPU'.
+ *
+ * Only two CPUs may participate - they can enter in any order.
+ * ( The serial nature of the boot logic and the CPU hotplug lock
+ * protects against more than 2 CPUs entering this code. )
+ */
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <asm/tsc.h>
+
+/*
+ * Entry/exit counters that make sure that both CPUs
+ * run the measurement code at once:
+ */
+static __cpuinitdata atomic_t start_count;
+static __cpuinitdata atomic_t stop_count;
+
+/*
+ * We use a raw spinlock in this exceptional case, because
+ * we want to have the fastest, inlined, non-debug version
+ * of a critical section, to be able to prove TSC time-warps:
+ */
+static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+static __cpuinitdata cycles_t last_tsc;
+static __cpuinitdata cycles_t max_warp;
+static __cpuinitdata int nr_warps;
+
+/*
+ * TSC-warp measurement loop running on both CPUs:
+ */
+static __cpuinit void check_tsc_warp(void)
+{
+ cycles_t start, now, prev, end;
+ int i;
+
+ start = get_cycles_sync();
+ /*
+ * The measurement runs for 20 msecs:
+ */
+ end = start + cpu_khz * 20ULL;
+ now = start;
+
+ for (i = 0; ; i++) {
+ /*
+ * We take the global lock, measure TSC, save the
+ * previous TSC that was measured (possibly on
+ * another CPU) and update the previous TSC timestamp.
+ */
+ __raw_spin_lock(&sync_lock);
+ prev = last_tsc;
+ now = get_cycles_sync();
+ last_tsc = now;
+ __raw_spin_unlock(&sync_lock);
+
+ /*
+ * Be nice every now and then (and also check whether
+ * measurement is done [we also insert a 100 million
+ * loops safety exit, so we dont lock up in case the
+ * TSC readout is totally broken]):
+ */
+ if (unlikely(!(i & 7))) {
+ if (now > end || i > 100000000)
+ break;
+ cpu_relax();
+ touch_nmi_watchdog();
+ }
+ /*
+ * Outside the critical section we can now see whether
+ * we saw a time-warp of the TSC going backwards:
+ */
+ if (unlikely(prev > now)) {
+ __raw_spin_lock(&sync_lock);
+ max_warp = max(max_warp, prev - now);
+ nr_warps++;
+ __raw_spin_unlock(&sync_lock);
+ }
+
+ }
+}
+
+/*
+ * Source CPU calls into this - it waits for the freshly booted
+ * target CPU to arrive and then starts the measurement:
+ */
+void __cpuinit check_tsc_sync_source(int cpu)
+{
+ int cpus = 2;
+
+ /*
+ * No need to check if we already know that the TSC is not
+ * synchronized:
+ */
+ if (unsynchronized_tsc())
+ return;
+
+ printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
+ smp_processor_id(), cpu);
+
+ /*
+ * Reset it - in case this is a second bootup:
+ */
+ atomic_set(&stop_count, 0);
+
+ /*
+ * Wait for the target to arrive:
+ */
+ while (atomic_read(&start_count) != cpus-1)
+ cpu_relax();
+ /*
+ * Trigger the target to continue into the measurement too:
+ */
+ atomic_inc(&start_count);
+
+ check_tsc_warp();
+
+ while (atomic_read(&stop_count) != cpus-1)
+ cpu_relax();
+
+ /*
+ * Reset it - just in case we boot another CPU later:
+ */
+ atomic_set(&start_count, 0);
+
+ if (nr_warps) {
+ printk("\n");
+ printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
+ " turning off TSC clock.\n", max_warp);
+ mark_tsc_unstable();
+ nr_warps = 0;
+ max_warp = 0;
+ last_tsc = 0;
+ } else {
+ printk(" passed.\n");
+ }
+
+ /*
+ * Let the target continue with the bootup:
+ */
+ atomic_inc(&stop_count);
+}
+
+/*
+ * Freshly booted CPUs call into this:
+ */
+void __cpuinit check_tsc_sync_target(void)
+{
+ int cpus = 2;
+
+ if (unsynchronized_tsc())
+ return;
+
+ /*
+ * Register this CPU's participation and wait for the
+ * source CPU to start the measurement:
+ */
+ atomic_inc(&start_count);
+ while (atomic_read(&start_count) != cpus)
+ cpu_relax();
+
+ check_tsc_warp();
+
+ /*
+ * Ok, we are done:
+ */
+ atomic_inc(&stop_count);
+
+ /*
+ * Wait for the source CPU to print stuff:
+ */
+ while (atomic_read(&stop_count) != cpus)
+ cpu_relax();
+}
+#undef NR_LOOPS
+
Index: linux-2.6.20-rc4-mm1-bo/include/asm-i386/tsc.h
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/include/asm-i386/tsc.h
+++ linux-2.6.20-rc4-mm1-bo/include/asm-i386/tsc.h
@@ -1,48 +1 @@
-/*
- * linux/include/asm-i386/tsc.h
- *
- * i386 TSC related functions
- */
-#ifndef _ASM_i386_TSC_H
-#define _ASM_i386_TSC_H
-
-#include <asm/processor.h>
-
-/*
- * Standard way to access the cycle counter on i586+ CPUs.
- * Currently only used on SMP.
- *
- * If you really have a SMP machine with i486 chips or older,
- * compile for that, and this will just always return zero.
- * That's ok, it just means that the nicer scheduling heuristics
- * won't work for you.
- *
- * We only use the low 32 bits, and we'd simply better make sure
- * that we reschedule before that wraps. Scheduling at least every
- * four billion cycles just basically sounds like a good idea,
- * regardless of how fast the machine is.
- */
-typedef unsigned long long cycles_t;
-
-extern unsigned int cpu_khz;
-extern unsigned int tsc_khz;
-
-static inline cycles_t get_cycles(void)
-{
- unsigned long long ret = 0;
-
-#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- return 0;
-#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
- rdtscll(ret);
-#endif
- return ret;
-}
-
-extern void tsc_init(void);
-extern void mark_tsc_unstable(void);
-
-#endif
+#include <asm-x86_64/tsc.h>
Index: linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/proto.h
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/include/asm-x86_64/proto.h
+++ linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/proto.h
@@ -91,8 +91,6 @@ extern void check_efer(void);
extern int unhandled_signal(struct task_struct *tsk, int sig);
-extern int unsynchronized_tsc(void);
-
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern unsigned long table_start, table_end;
Index: linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/timex.h
===================================================================
--- linux-2.6.20-rc4-mm1-bo.orig/include/asm-x86_64/timex.h
+++ linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/timex.h
@@ -12,35 +12,11 @@
#include <asm/hpet.h>
#include <asm/system.h>
#include <asm/processor.h>
+#include <asm/tsc.h>
#include <linux/compiler.h>
#define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */
-typedef unsigned long long cycles_t;
-
-static inline cycles_t get_cycles (void)
-{
- unsigned long long ret;
-
- rdtscll(ret);
- return ret;
-}
-
-/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
-{
- unsigned long long ret;
- unsigned eax;
- /* Don't do an additional sync on CPUs where we know
- RDTSC is already synchronous. */
- alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
- "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
- rdtscll(ret);
- return ret;
-}
-
-extern unsigned int cpu_khz;
-
extern int read_current_timer(unsigned long *timer_value);
#define ARCH_HAS_READ_CURRENT_TIMER 1
Index: linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/tsc.h
===================================================================
--- /dev/null
+++ linux-2.6.20-rc4-mm1-bo/include/asm-x86_64/tsc.h
@@ -0,0 +1,66 @@
+/*
+ * linux/include/asm-x86_64/tsc.h
+ *
+ * x86_64 TSC related functions
+ */
+#ifndef _ASM_x86_64_TSC_H
+#define _ASM_x86_64_TSC_H
+
+#include <asm/processor.h>
+
+/*
+ * Standard way to access the cycle counter.
+ */
+typedef unsigned long long cycles_t;
+
+extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+
+static inline cycles_t get_cycles(void)
+{
+ unsigned long long ret = 0;
+
+#ifndef CONFIG_X86_TSC
+ if (!cpu_has_tsc)
+ return 0;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+ rdtscll(ret);
+#endif
+ return ret;
+}
+
+/* Like get_cycles, but make sure the CPU is synchronized. */
+static __always_inline cycles_t get_cycles_sync(void)
+{
+ unsigned long long ret;
+#ifdef X86_FEATURE_SYNC_RDTSC
+ unsigned eax;
+
+ /*
+ * Don't do an additional sync on CPUs where we know
+ * RDTSC is already synchronous:
+ */
+ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
+ "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+#else
+ sync_core();
+#endif
+ rdtscll(ret);
+
+ return ret;
+}
+
+extern void tsc_init(void);
+extern void mark_tsc_unstable(void);
+extern int unsynchronized_tsc(void);
+
+/*
+ * Boot-time check whether the TSCs are synchronized across
+ * all CPUs/cores:
+ */
+extern void check_tsc_sync_source(int cpu);
+extern void check_tsc_sync_target(void);
+
+#endif
--
next prev parent reply other threads:[~2007-01-23 22:14 UTC|newest]
Thread overview: 78+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-23 22:00 [patch 00/46] High resolution timer / dynamic tick update Thomas Gleixner
2007-01-23 22:00 ` [patch 01/46] Add irq flag to disable balancing for an interrupt Thomas Gleixner
2007-01-23 22:00 ` [patch 02/46] Add a functions to handle interrupt affinity setting Thomas Gleixner
2007-01-23 22:00 ` [patch 03/46] [RFC] HZ free ntp Thomas Gleixner
2007-01-23 22:00 ` [patch 04/46] Uninline jiffies.h functions Thomas Gleixner
2007-01-23 22:01 ` [patch 05/46] Thomas Gleixner
2007-01-23 22:01 ` [patch 06/46] Fix timeout overflow with jiffies Thomas Gleixner
2007-01-23 22:01 ` [patch 07/46] GTOD: persistent clock support Thomas Gleixner
2007-01-23 22:01 ` [patch 08/46] i386: use GTOD " Thomas Gleixner
2007-01-23 22:01 ` [patch 09/46] i386 Remove useless code in tsc.c Thomas Gleixner
2007-01-23 22:01 ` [patch 10/46] Simplify the registration of clocksources Thomas Gleixner
2007-01-23 22:01 ` Thomas Gleixner [this message]
2007-01-23 22:01 ` [patch 12/46] clocksource: replace is_continuous by a flag field Thomas Gleixner
2007-01-24 11:23 ` [patch] clocksource: fixup is_continous changes in vmitime.c Ingo Molnar
2007-01-24 11:53 ` Thomas Gleixner
2007-01-23 22:01 ` [patch 13/46] clocksource: fixup is_continous changes on ARM Thomas Gleixner
2007-01-23 22:01 ` [patch 14/46] clocksource: fixup is_continous changes on AVR32 Thomas Gleixner
2007-01-23 22:01 ` [patch 15/46] clocksource: fixup is_continous changes on S390 Thomas Gleixner
2007-01-23 22:01 ` [patch 16/46] clocksource: fixup is_continous changes on MIPS Thomas Gleixner
2007-01-23 22:01 ` [patch 17/46] clocksource: Remove the update callback Thomas Gleixner
2007-01-23 22:01 ` [patch 18/46] clocksource: Add verification (watchdog) helper Thomas Gleixner
2007-01-24 15:42 ` [patch] clocksource: add verification (watchdog) helper, fix Ingo Molnar
2007-01-23 22:01 ` [patch 19/46] Mark TSC on GeodeLX reliable Thomas Gleixner
2007-01-23 22:01 ` [patch 20/46] uninline irq_enter() Thomas Gleixner
2007-01-23 22:01 ` [patch 21/46] Fix cascade lookup of next_timer_interrupt Thomas Gleixner
2007-01-23 22:01 ` [patch 22/46] Extend next_timer_interrupt() to use a reference jiffie Thomas Gleixner
2007-01-23 22:01 ` [patch 23/46] hrtimers: namespace and enum cleanup Thomas Gleixner
2007-01-23 22:01 ` [patch 24/46] hrtimers: namespace and enum cleanup vs. git-input Thomas Gleixner
2007-01-23 22:01 ` [patch 25/46] hrtimers: cleanup locking Thomas Gleixner
2007-01-23 22:01 ` [patch 26/46] hrtimers; add state tracking Thomas Gleixner
2007-01-23 22:01 ` [patch 27/46] hrtimers: clean up callback tracking Thomas Gleixner
2007-01-23 22:01 ` [patch 28/46] hrtimers: move and add documentation Thomas Gleixner
2007-01-23 22:01 ` [patch 29/46] ACPI: fix missing include for UP Thomas Gleixner
2007-01-23 22:01 ` [patch 30/46] ACPI keep track of timer broadcasting Thomas Gleixner
2007-01-23 22:01 ` [patch 31/46] Allow early access to the power management timer Thomas Gleixner
2007-01-23 22:01 ` [patch 32/46] i386, apic: clean up the APIC code Thomas Gleixner
2007-01-23 22:01 ` [patch 33/46] clockevents: add core functionality Thomas Gleixner
2007-01-23 22:01 ` [patch 34/46] tick-management: " Thomas Gleixner
2007-01-23 22:01 ` [patch 35/46] tick-management: broadcast functionality Thomas Gleixner
2007-01-23 22:01 ` [patch 36/46] tick-management: dyntick / highres functionality Thomas Gleixner
2007-01-28 2:03 ` [PATCH] high_res_timers: precisely update_process_times; " Karsten Wiese
2007-01-23 22:01 ` [patch 37/46] clockevents: i383 drivers Thomas Gleixner
2007-01-23 22:01 ` [patch 38/46] i386 rework local apic timer calibration Thomas Gleixner
2007-01-23 22:01 ` [patch 39/46] i386 prepare for dyntick Thomas Gleixner
2007-01-23 22:01 ` [patch 40/46] i386 prepare nmi watchdog for dynticks Thomas Gleixner
2007-01-23 22:01 ` [patch 41/46] i386: enable dynticks in kconfig Thomas Gleixner
2007-01-23 22:01 ` [patch 42/46] hrtimers: add high resolution timer support Thomas Gleixner
2007-01-23 22:01 ` [patch 43/46] hrtimers: prevent possible itimer DoS Thomas Gleixner
2007-01-23 22:01 ` [patch 44/46] Add debugging feature /proc/timer_stat Thomas Gleixner
2007-01-23 22:01 ` [patch 45/46] Add debugging feature /proc/timer_list Thomas Gleixner
2007-01-23 22:01 ` [patch 46/46] Add SysRq-Q to print timer_list debug info Thomas Gleixner
2007-01-24 2:16 ` [patch 00/46] High resolution timer / dynamic tick update Daniel Walker
2007-01-24 2:23 ` Andrew Morton
2007-01-24 3:25 ` Daniel Walker
2007-01-24 7:07 ` Ingo Molnar
2007-01-24 9:30 ` Daniel Walker
2007-01-24 9:51 ` Ingo Molnar
2007-01-24 10:23 ` Daniel Walker
2007-01-24 10:29 ` Ingo Molnar
2007-01-24 10:53 ` Daniel Walker
2007-01-24 11:04 ` Ingo Molnar
2007-01-24 11:13 ` Thomas Gleixner
2007-01-24 15:53 ` Daniel Walker
[not found] ` <20070124160046.GA24798@elte.hu>
2007-01-24 17:21 ` Daniel Walker
[not found] ` <1169655076.19471.241.camel@imap.mvista.com>
2007-01-24 19:38 ` Ingo Molnar
2007-01-24 20:09 ` Daniel Walker
2007-01-24 20:13 ` Ingo Molnar
2007-01-24 19:57 ` john stultz
2007-01-24 20:51 ` Daniel Walker
2007-01-24 21:23 ` john stultz
2007-01-24 21:37 ` Daniel Walker
2007-01-25 6:10 ` Ingo Molnar
2007-01-25 6:37 ` Ingo Molnar
2007-01-25 6:32 ` Ingo Molnar
2007-01-25 16:38 ` Daniel Walker
2007-01-28 2:17 ` Andrew Morton
2007-01-29 21:31 ` john stultz
2007-01-29 21:45 ` john stultz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070123211204.388919000@localhost.localdomain \
--to=tglx@linutronix.de \
--cc=akpm@osdl.org \
--cc=arjan@infradead.org \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=zippel@linux-m68k.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.