* [RFC][PATCH] sched_clock_cpu()
@ 2008-05-03 16:29 Peter Zijlstra
2008-05-03 16:29 ` Peter Zijlstra
` (3 more replies)
0 siblings, 4 replies; 18+ messages in thread
From: Peter Zijlstra @ 2008-05-03 16:29 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, David Miller, Guillaume Chazarain,
Andi Kleen
Cc: linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
Hi,
This is my current proposal to replace the rq->clock stuff (and possibly
cpu_clock()).
it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
whole body hurt like hell, so I'm not getting anything done.
Brain dump before I crash:
- architectures that have a 'perfect' hardware clock can set
CONFIG_HAVE_STABLE_CLOCK
- the 'jiffie' window might be superfulous when we update tick_gtod
before the __update_sched_clock() call in sched_clock_tick()
- cpu_clock() might be implemented as:
sched_clock_cpu(smp_processor_id())
if the accuracy proves good enough - how far can TSC drift in a
single jiffie when considering the filtering and idle hooks?
- what other architectures besides x86 would need this?
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 29 ++++++
init/main.c | 1
kernel/Makefile | 2
kernel/sched.c | 163 ++----------------------------------
kernel/sched_clock.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched_debug.c | 7 -
kernel/sched_fair.c | 2
7 files changed, 266 insertions(+), 159 deletions(-)
Index: linux-2.6-2/kernel/sched.c
===================================================================
--- linux-2.6-2.orig/kernel/sched.c
+++ linux-2.6-2/kernel/sched.c
@@ -75,16 +75,6 @@
#include <asm/irq_regs.h>
/*
- * Scheduler clock - returns current time in nanosec units.
- * This is default implementation.
- * Architectures and sub-architectures can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
-{
- return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-}
-
-/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
* and back.
@@ -560,13 +550,7 @@ struct rq {
unsigned long next_balance;
struct mm_struct *prev_mm;
- u64 clock, prev_clock_raw;
- s64 clock_max_delta;
-
- unsigned int clock_warps, clock_overflows, clock_underflows;
- u64 idle_clock;
- unsigned int clock_deep_idle_events;
- u64 tick_timestamp;
+ u64 clock;
atomic_t nr_iowait;
@@ -631,82 +615,6 @@ static inline int cpu_of(struct rq *rq)
#endif
}
-#ifdef CONFIG_NO_HZ
-static inline bool nohz_on(int cpu)
-{
- return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
-}
-
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
- return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
- rq->last_tick_seen = jiffies;
-}
-#else
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
- return 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
-}
-#endif
-
-/*
- * Update the per-runqueue clock, as finegrained as the platform can give
- * us, but without assuming monotonicity, etc.:
- */
-static void __update_rq_clock(struct rq *rq)
-{
- u64 prev_raw = rq->prev_clock_raw;
- u64 now = sched_clock();
- s64 delta = now - prev_raw;
- u64 clock = rq->clock;
-
-#ifdef CONFIG_SCHED_DEBUG
- WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-#endif
- /*
- * Protect against sched_clock() occasionally going backwards:
- */
- if (unlikely(delta < 0)) {
- clock++;
- rq->clock_warps++;
- } else {
- /*
- * Catch too large forward jumps too:
- */
- u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
- u64 max_time = rq->tick_timestamp + max_jump;
-
- if (unlikely(clock + delta > max_time)) {
- if (clock < max_time)
- clock = max_time;
- else
- clock++;
- rq->clock_overflows++;
- } else {
- if (unlikely(delta > rq->clock_max_delta))
- rq->clock_max_delta = delta;
- clock += delta;
- }
- }
-
- rq->prev_clock_raw = now;
- rq->clock = clock;
-}
-
-static void update_rq_clock(struct rq *rq)
-{
- if (likely(smp_processor_id() == cpu_of(rq)))
- __update_rq_clock(rq);
-}
-
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
@@ -722,6 +630,11 @@ static void update_rq_clock(struct rq *r
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+static inline void update_rq_clock(struct rq *rq)
+{
+ rq->clock = sched_clock_cpu(cpu_of(rq));
+}
+
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
@@ -935,7 +848,6 @@ static unsigned long long __cpu_clock(in
{
unsigned long long now;
unsigned long flags;
- struct rq *rq;
/*
* Only call sched_clock() if the scheduler has already been
@@ -945,9 +857,7 @@ static unsigned long long __cpu_clock(in
return 0;
local_irq_save(flags);
- rq = cpu_rq(cpu);
- update_rq_clock(rq);
- now = rq->clock;
+ now = sched_clock_cpu(cpu);
local_irq_restore(flags);
return now;
@@ -1117,43 +1027,6 @@ static struct rq *this_rq_lock(void)
return rq;
}
-/*
- * We are going deep-idle (irqs are disabled):
- */
-void sched_clock_idle_sleep_event(void)
-{
- struct rq *rq = cpu_rq(smp_processor_id());
-
- spin_lock(&rq->lock);
- __update_rq_clock(rq);
- spin_unlock(&rq->lock);
- rq->clock_deep_idle_events++;
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
-
-/*
- * We just idled delta nanoseconds (called with irqs disabled):
- */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
- struct rq *rq = cpu_rq(smp_processor_id());
- u64 now = sched_clock();
-
- rq->idle_clock += delta_ns;
- /*
- * Override the previous timestamp and ignore all
- * sched_clock() deltas that occured while we idled,
- * and use the PM-provided delta_ns to advance the
- * rq clock:
- */
- spin_lock(&rq->lock);
- rq->prev_clock_raw = now;
- rq->clock += delta_ns;
- spin_unlock(&rq->lock);
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-
static void __resched_task(struct task_struct *p, int tif_bit);
static inline void resched_task(struct task_struct *p)
@@ -1275,7 +1148,7 @@ static enum hrtimer_restart hrtick(struc
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
spin_lock(&rq->lock);
- __update_rq_clock(rq);
+ update_rq_clock(rq);
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
spin_unlock(&rq->lock);
@@ -4404,19 +4277,11 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
- u64 next_tick = rq->tick_timestamp + TICK_NSEC;
+
+ sched_clock_tick();
spin_lock(&rq->lock);
- __update_rq_clock(rq);
- /*
- * Let rq->clock advance by at least TICK_NSEC:
- */
- if (unlikely(rq->clock < next_tick)) {
- rq->clock = next_tick;
- rq->clock_underflows++;
- }
- rq->tick_timestamp = rq->clock;
- update_last_tick_seen(rq);
+ update_rq_clock(rq);
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
spin_unlock(&rq->lock);
@@ -4570,7 +4435,7 @@ need_resched_nonpreemptible:
* Do the rq-clock update outside the rq lock:
*/
local_irq_disable();
- __update_rq_clock(rq);
+ update_rq_clock(rq);
spin_lock(&rq->lock);
clear_tsk_need_resched(prev);
@@ -8149,8 +8014,6 @@ void __init sched_init(void)
spin_lock_init(&rq->lock);
lockdep_set_class(&rq->lock, &rq->rq_lock_key);
rq->nr_running = 0;
- rq->clock = 1;
- update_last_tick_seen(rq);
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8294,6 +8157,7 @@ EXPORT_SYMBOL(__might_sleep);
static void normalize_task(struct rq *rq, struct task_struct *p)
{
int on_rq;
+
update_rq_clock(rq);
on_rq = p->se.on_rq;
if (on_rq)
@@ -8325,7 +8189,6 @@ void normalize_rt_tasks(void)
p->se.sleep_start = 0;
p->se.block_start = 0;
#endif
- task_rq(p)->clock = 0;
if (!rt_task(p)) {
/*
Index: linux-2.6-2/kernel/sched_clock.c
===================================================================
--- /dev/null
+++ linux-2.6-2/kernel/sched_clock.c
@@ -0,0 +1,221 @@
+/*
+ * sched_clock for unstable cpu clocks
+ *
+ * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Based on code by:
+ * Ingo Molnar <mingo@redhat.com>
+ * Guillaume Chazarain <guichaz@gmail.com>
+ *
+ * Create a semi stable clock from a mixture of other events, including:
+ * - gtod
+ * - jiffies
+ * - sched_clock()
+ * - explicit idle events
+ *
+ * We use gtod as base and the unstable clock deltas. The deltas are filtered,
+ * making it monotonic and keeping it within an expected window. This window
+ * is set up using jiffies.
+ *
+ * Furthermore, explicit sleep and wakeup hooks allow us to account for time
+ * that is otherwise invisible (TSC gets stopped).
+ *
+ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
+ * consistent between cpus (never more than 1 jiffies difference).
+ */
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+
+#ifndef CONFIG_HAVE_STABLE_CLOCK
+
+struct sched_clock_data {
+ spinlock_t lock;
+ unsigned long prev_jiffies;
+ u64 prev_raw;
+ u64 tick_raw;
+ u64 tick_gtod;
+ u64 clock;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
+
+static inline struct sched_clock_data *this_scd(void)
+{
+ return &__get_cpu_var(sched_clock_data);
+}
+
+static inline struct sched_clock_data *cpu_sdc(int cpu)
+{
+ return &per_cpu(sched_clock_data, cpu);
+}
+
+void sched_clock_init(void)
+{
+ u64 ktime_now = ktime_to_ns(ktime_get());
+ u64 now = 0; // sched_clock();
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct sched_clock_data *scd = cpu_sdc(cpu);
+
+ spin_lock_init(&scd->lock);
+ scd->prev_jiffies = jiffies;
+ scd->prev_raw = now;
+ scd->tick_raw = now;
+ scd->tick_gtod = ktime_now;
+ scd->clock = ktime_now;
+ }
+}
+
+/*
+ * update the percpu scd from the raw @now value
+ *
+ * - filter out backward motion
+ * - use jiffies to generate a min,max window to clip the raw values
+ */
+static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
+{
+ unsigned long now_jiffies = jiffies;
+ long delta_jiffies = now_jiffies - scd->prev_jiffies;
+ u64 clock = scd->clock;
+ u64 min_clock, max_clock;
+ s64 delta = now - scd->prev_raw;
+
+ min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
+
+ if (unlikely(delta < 0)) {
+ clock++;
+ goto out;
+ }
+
+ max_clock = min_clock + TICK_NSEC;
+
+ if (unlikely(clock + delta > max_clock)) {
+ if (clock < max_clock)
+ clock = max_clock;
+ else
+ clock++;
+ } else {
+ clock += delta;
+ }
+
+ out:
+ if (unlikely(clock < min_clock))
+ clock = min_clock;
+
+ scd->prev_raw = now;
+ scd->prev_jiffies = now_jiffies;
+ scd->clock = clock;
+}
+
+static void lock_double_clock(struct sched_clock_data *data1,
+ struct sched_clock_data *data2)
+{
+ if (data1 < data2) {
+ spin_lock(&data1->lock);
+ spin_lock_nested(&data2->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&data2->lock);
+ spin_lock_nested(&data1->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+u64 sched_clock_cpu(int cpu)
+{
+ struct sched_clock_data *scd = cpu_sdc(cpu);
+ u64 now = sched_clock();
+ u64 clock;
+
+ if (cpu != smp_processor_id()) {
+ /*
+ * in order to update a remote cpu's clock based on our
+ * unstable raw time rebase it against:
+ * tick_raw (offset between raw counters)
+ * tick_gotd (tick offset between cpus)
+ */
+ struct sched_clock_data *my_scd = this_scd();
+
+ lock_double_clock(scd, my_scd);
+
+ now -= my_scd->tick_raw;
+ now += scd->tick_raw;
+
+ now -= my_scd->tick_gtod;
+ now += scd->tick_gtod;
+
+ spin_unlock(&my_scd->lock);
+ } else
+ spin_lock(&scd->lock);
+
+ __update_sched_clock(scd, now);
+ clock = scd->clock;
+
+ spin_unlock(&scd->lock);
+
+ return clock;
+}
+
+void sched_clock_tick(void)
+{
+ struct sched_clock_data *scd = this_scd();
+ u64 now;
+
+ spin_lock(&scd->lock);
+ now = sched_clock();
+ __update_sched_clock(scd, now);
+ /*
+ * update tick_gtod after __update_sched_clock() because that will
+ * already observe 1 new jiffy; adding a new tick_gtod to that would
+ * increase the clock 2 jiffies.
+ */
+ scd->tick_raw = now;
+ scd->tick_gtod = ktime_to_ns(ktime_get()); // XXX get from regular tick
+ spin_unlock(&scd->lock);
+}
+
+/*
+ * We are going deep-idle (irqs are disabled):
+ */
+void sched_clock_idle_sleep_event(void)
+{
+ sched_clock_cpu(smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+ struct sched_clock_data *scd = this_scd();
+
+ /*
+ * Override the previous timestamp and ignore all
+ * sched_clock() deltas that occured while we idled,
+ * and use the PM-provided delta_ns to advance the
+ * rq clock:
+ */
+ spin_lock(&scd->lock);
+ scd->prev_raw = sched_clock();
+ scd->clock += delta_ns;
+ spin_unlock(&scd->lock);
+
+ touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+
+#endif
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+ return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+}
Index: linux-2.6-2/include/linux/sched.h
===================================================================
--- linux-2.6-2.orig/include/linux/sched.h
+++ linux-2.6-2/include/linux/sched.h
@@ -1551,6 +1551,35 @@ static inline int set_cpus_allowed(struc
extern unsigned long long sched_clock(void);
+#ifdef CONFIG_HAVE_STABLE_CLOCK
+static inline void sched_clock_init(void)
+{
+}
+
+static inline u64 sched_clock_cpu(int cpu)
+{
+ return sched_clock();
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+#else
+extern void sched_clock_init(void);
+extern u64 sched_clock_cpu(int cpu);
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+#endif
+
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
Index: linux-2.6-2/kernel/Makefile
===================================================================
--- linux-2.6-2.orig/kernel/Makefile
+++ linux-2.6-2/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
Index: linux-2.6-2/kernel/sched_debug.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_debug.c
+++ linux-2.6-2/kernel/sched_debug.c
@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m
PN(next_balance);
P(curr->pid);
PN(clock);
- PN(idle_clock);
- PN(prev_clock_raw);
- P(clock_warps);
- P(clock_overflows);
- P(clock_underflows);
- P(clock_deep_idle_events);
- PN(clock_max_delta);
P(cpu_load[0]);
P(cpu_load[1]);
P(cpu_load[2]);
Index: linux-2.6-2/kernel/sched_fair.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_fair.c
+++ linux-2.6-2/kernel/sched_fair.c
@@ -957,7 +957,7 @@ static void yield_task_fair(struct rq *r
return;
if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
- __update_rq_clock(rq);
+ update_rq_clock(rq);
/*
* Update run-time statistics of the 'current'.
*/
Index: linux-2.6-2/init/main.c
===================================================================
--- linux-2.6-2.orig/init/main.c
+++ linux-2.6-2/init/main.c
@@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void
softirq_init();
timekeeping_init();
time_init();
+ sched_clock_init();
profile_init();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
^ permalink raw reply [flat|nested] 18+ messages in thread* [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:29 [RFC][PATCH] sched_clock_cpu() Peter Zijlstra
@ 2008-05-03 16:29 ` Peter Zijlstra
2008-05-03 16:37 ` Ingo Molnar
` (2 subsequent siblings)
3 siblings, 0 replies; 18+ messages in thread
From: Peter Zijlstra @ 2008-05-03 16:29 UTC (permalink / raw)
To: Ingo Molnar, Thomas Gleixner, David Miller, Guillaume Chazarain,
Andi Kleen
Cc: linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
Hi,
This is my current proposal to replace the rq->clock stuff (and possibly
cpu_clock()).
it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
whole body hurt like hell, so I'm not getting anything done.
Brain dump before I crash:
- architectures that have a 'perfect' hardware clock can set
CONFIG_HAVE_STABLE_CLOCK
- the 'jiffie' window might be superfulous when we update tick_gtod
before the __update_sched_clock() call in sched_clock_tick()
- cpu_clock() might be implemented as:
sched_clock_cpu(smp_processor_id())
if the accuracy proves good enough - how far can TSC drift in a
single jiffie when considering the filtering and idle hooks?
- what other architectures besides x86 would need this?
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/sched.h | 29 ++++++
init/main.c | 1
kernel/Makefile | 2
kernel/sched.c | 163 ++----------------------------------
kernel/sched_clock.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++++
kernel/sched_debug.c | 7 -
kernel/sched_fair.c | 2
7 files changed, 266 insertions(+), 159 deletions(-)
Index: linux-2.6-2/kernel/sched.c
===================================================================
--- linux-2.6-2.orig/kernel/sched.c
+++ linux-2.6-2/kernel/sched.c
@@ -75,16 +75,6 @@
#include <asm/irq_regs.h>
/*
- * Scheduler clock - returns current time in nanosec units.
- * This is default implementation.
- * Architectures and sub-architectures can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
-{
- return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
-}
-
-/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
* and back.
@@ -560,13 +550,7 @@ struct rq {
unsigned long next_balance;
struct mm_struct *prev_mm;
- u64 clock, prev_clock_raw;
- s64 clock_max_delta;
-
- unsigned int clock_warps, clock_overflows, clock_underflows;
- u64 idle_clock;
- unsigned int clock_deep_idle_events;
- u64 tick_timestamp;
+ u64 clock;
atomic_t nr_iowait;
@@ -631,82 +615,6 @@ static inline int cpu_of(struct rq *rq)
#endif
}
-#ifdef CONFIG_NO_HZ
-static inline bool nohz_on(int cpu)
-{
- return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
-}
-
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
- return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
- rq->last_tick_seen = jiffies;
-}
-#else
-static inline u64 max_skipped_ticks(struct rq *rq)
-{
- return 1;
-}
-
-static inline void update_last_tick_seen(struct rq *rq)
-{
-}
-#endif
-
-/*
- * Update the per-runqueue clock, as finegrained as the platform can give
- * us, but without assuming monotonicity, etc.:
- */
-static void __update_rq_clock(struct rq *rq)
-{
- u64 prev_raw = rq->prev_clock_raw;
- u64 now = sched_clock();
- s64 delta = now - prev_raw;
- u64 clock = rq->clock;
-
-#ifdef CONFIG_SCHED_DEBUG
- WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-#endif
- /*
- * Protect against sched_clock() occasionally going backwards:
- */
- if (unlikely(delta < 0)) {
- clock++;
- rq->clock_warps++;
- } else {
- /*
- * Catch too large forward jumps too:
- */
- u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
- u64 max_time = rq->tick_timestamp + max_jump;
-
- if (unlikely(clock + delta > max_time)) {
- if (clock < max_time)
- clock = max_time;
- else
- clock++;
- rq->clock_overflows++;
- } else {
- if (unlikely(delta > rq->clock_max_delta))
- rq->clock_max_delta = delta;
- clock += delta;
- }
- }
-
- rq->prev_clock_raw = now;
- rq->clock = clock;
-}
-
-static void update_rq_clock(struct rq *rq)
-{
- if (likely(smp_processor_id() == cpu_of(rq)))
- __update_rq_clock(rq);
-}
-
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
@@ -722,6 +630,11 @@ static void update_rq_clock(struct rq *r
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+static inline void update_rq_clock(struct rq *rq)
+{
+ rq->clock = sched_clock_cpu(cpu_of(rq));
+}
+
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
@@ -935,7 +848,6 @@ static unsigned long long __cpu_clock(in
{
unsigned long long now;
unsigned long flags;
- struct rq *rq;
/*
* Only call sched_clock() if the scheduler has already been
@@ -945,9 +857,7 @@ static unsigned long long __cpu_clock(in
return 0;
local_irq_save(flags);
- rq = cpu_rq(cpu);
- update_rq_clock(rq);
- now = rq->clock;
+ now = sched_clock_cpu(cpu);
local_irq_restore(flags);
return now;
@@ -1117,43 +1027,6 @@ static struct rq *this_rq_lock(void)
return rq;
}
-/*
- * We are going deep-idle (irqs are disabled):
- */
-void sched_clock_idle_sleep_event(void)
-{
- struct rq *rq = cpu_rq(smp_processor_id());
-
- spin_lock(&rq->lock);
- __update_rq_clock(rq);
- spin_unlock(&rq->lock);
- rq->clock_deep_idle_events++;
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
-
-/*
- * We just idled delta nanoseconds (called with irqs disabled):
- */
-void sched_clock_idle_wakeup_event(u64 delta_ns)
-{
- struct rq *rq = cpu_rq(smp_processor_id());
- u64 now = sched_clock();
-
- rq->idle_clock += delta_ns;
- /*
- * Override the previous timestamp and ignore all
- * sched_clock() deltas that occured while we idled,
- * and use the PM-provided delta_ns to advance the
- * rq clock:
- */
- spin_lock(&rq->lock);
- rq->prev_clock_raw = now;
- rq->clock += delta_ns;
- spin_unlock(&rq->lock);
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-
static void __resched_task(struct task_struct *p, int tif_bit);
static inline void resched_task(struct task_struct *p)
@@ -1275,7 +1148,7 @@ static enum hrtimer_restart hrtick(struc
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
spin_lock(&rq->lock);
- __update_rq_clock(rq);
+ update_rq_clock(rq);
rq->curr->sched_class->task_tick(rq, rq->curr, 1);
spin_unlock(&rq->lock);
@@ -4404,19 +4277,11 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
- u64 next_tick = rq->tick_timestamp + TICK_NSEC;
+
+ sched_clock_tick();
spin_lock(&rq->lock);
- __update_rq_clock(rq);
- /*
- * Let rq->clock advance by at least TICK_NSEC:
- */
- if (unlikely(rq->clock < next_tick)) {
- rq->clock = next_tick;
- rq->clock_underflows++;
- }
- rq->tick_timestamp = rq->clock;
- update_last_tick_seen(rq);
+ update_rq_clock(rq);
update_cpu_load(rq);
curr->sched_class->task_tick(rq, curr, 0);
spin_unlock(&rq->lock);
@@ -4570,7 +4435,7 @@ need_resched_nonpreemptible:
* Do the rq-clock update outside the rq lock:
*/
local_irq_disable();
- __update_rq_clock(rq);
+ update_rq_clock(rq);
spin_lock(&rq->lock);
clear_tsk_need_resched(prev);
@@ -8149,8 +8014,6 @@ void __init sched_init(void)
spin_lock_init(&rq->lock);
lockdep_set_class(&rq->lock, &rq->rq_lock_key);
rq->nr_running = 0;
- rq->clock = 1;
- update_last_tick_seen(rq);
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8294,6 +8157,7 @@ EXPORT_SYMBOL(__might_sleep);
static void normalize_task(struct rq *rq, struct task_struct *p)
{
int on_rq;
+
update_rq_clock(rq);
on_rq = p->se.on_rq;
if (on_rq)
@@ -8325,7 +8189,6 @@ void normalize_rt_tasks(void)
p->se.sleep_start = 0;
p->se.block_start = 0;
#endif
- task_rq(p)->clock = 0;
if (!rt_task(p)) {
/*
Index: linux-2.6-2/kernel/sched_clock.c
===================================================================
--- /dev/null
+++ linux-2.6-2/kernel/sched_clock.c
@@ -0,0 +1,221 @@
+/*
+ * sched_clock for unstable cpu clocks
+ *
+ * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Based on code by:
+ * Ingo Molnar <mingo@redhat.com>
+ * Guillaume Chazarain <guichaz@gmail.com>
+ *
+ * Create a semi stable clock from a mixture of other events, including:
+ * - gtod
+ * - jiffies
+ * - sched_clock()
+ * - explicit idle events
+ *
+ * We use gtod as base and the unstable clock deltas. The deltas are filtered,
+ * making it monotonic and keeping it within an expected window. This window
+ * is set up using jiffies.
+ *
+ * Furthermore, explicit sleep and wakeup hooks allow us to account for time
+ * that is otherwise invisible (TSC gets stopped).
+ *
+ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
+ * consistent between cpus (never more than 1 jiffies difference).
+ */
+#include <linux/sched.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+
+#ifndef CONFIG_HAVE_STABLE_CLOCK
+
+struct sched_clock_data {
+ spinlock_t lock;
+ unsigned long prev_jiffies;
+ u64 prev_raw;
+ u64 tick_raw;
+ u64 tick_gtod;
+ u64 clock;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
+
+static inline struct sched_clock_data *this_scd(void)
+{
+ return &__get_cpu_var(sched_clock_data);
+}
+
+static inline struct sched_clock_data *cpu_sdc(int cpu)
+{
+ return &per_cpu(sched_clock_data, cpu);
+}
+
+void sched_clock_init(void)
+{
+ u64 ktime_now = ktime_to_ns(ktime_get());
+ u64 now = 0; // sched_clock();
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct sched_clock_data *scd = cpu_sdc(cpu);
+
+ spin_lock_init(&scd->lock);
+ scd->prev_jiffies = jiffies;
+ scd->prev_raw = now;
+ scd->tick_raw = now;
+ scd->tick_gtod = ktime_now;
+ scd->clock = ktime_now;
+ }
+}
+
+/*
+ * update the percpu scd from the raw @now value
+ *
+ * - filter out backward motion
+ * - use jiffies to generate a min,max window to clip the raw values
+ */
+static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
+{
+ unsigned long now_jiffies = jiffies;
+ long delta_jiffies = now_jiffies - scd->prev_jiffies;
+ u64 clock = scd->clock;
+ u64 min_clock, max_clock;
+ s64 delta = now - scd->prev_raw;
+
+ min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
+
+ if (unlikely(delta < 0)) {
+ clock++;
+ goto out;
+ }
+
+ max_clock = min_clock + TICK_NSEC;
+
+ if (unlikely(clock + delta > max_clock)) {
+ if (clock < max_clock)
+ clock = max_clock;
+ else
+ clock++;
+ } else {
+ clock += delta;
+ }
+
+ out:
+ if (unlikely(clock < min_clock))
+ clock = min_clock;
+
+ scd->prev_raw = now;
+ scd->prev_jiffies = now_jiffies;
+ scd->clock = clock;
+}
+
+static void lock_double_clock(struct sched_clock_data *data1,
+ struct sched_clock_data *data2)
+{
+ if (data1 < data2) {
+ spin_lock(&data1->lock);
+ spin_lock_nested(&data2->lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&data2->lock);
+ spin_lock_nested(&data1->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+u64 sched_clock_cpu(int cpu)
+{
+ struct sched_clock_data *scd = cpu_sdc(cpu);
+ u64 now = sched_clock();
+ u64 clock;
+
+ if (cpu != smp_processor_id()) {
+ /*
+ * in order to update a remote cpu's clock based on our
+ * unstable raw time rebase it against:
+ * tick_raw (offset between raw counters)
+ * tick_gotd (tick offset between cpus)
+ */
+ struct sched_clock_data *my_scd = this_scd();
+
+ lock_double_clock(scd, my_scd);
+
+ now -= my_scd->tick_raw;
+ now += scd->tick_raw;
+
+ now -= my_scd->tick_gtod;
+ now += scd->tick_gtod;
+
+ spin_unlock(&my_scd->lock);
+ } else
+ spin_lock(&scd->lock);
+
+ __update_sched_clock(scd, now);
+ clock = scd->clock;
+
+ spin_unlock(&scd->lock);
+
+ return clock;
+}
+
+void sched_clock_tick(void)
+{
+ struct sched_clock_data *scd = this_scd();
+ u64 now;
+
+ spin_lock(&scd->lock);
+ now = sched_clock();
+ __update_sched_clock(scd, now);
+ /*
+ * update tick_gtod after __update_sched_clock() because that will
+ * already observe 1 new jiffy; adding a new tick_gtod to that would
+ * increase the clock 2 jiffies.
+ */
+ scd->tick_raw = now;
+ scd->tick_gtod = ktime_to_ns(ktime_get()); // XXX get from regular tick
+ spin_unlock(&scd->lock);
+}
+
+/*
+ * We are going deep-idle (irqs are disabled):
+ */
+void sched_clock_idle_sleep_event(void)
+{
+ sched_clock_cpu(smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+ struct sched_clock_data *scd = this_scd();
+
+ /*
+ * Override the previous timestamp and ignore all
+ * sched_clock() deltas that occured while we idled,
+ * and use the PM-provided delta_ns to advance the
+ * rq clock:
+ */
+ spin_lock(&scd->lock);
+ scd->prev_raw = sched_clock();
+ scd->clock += delta_ns;
+ spin_unlock(&scd->lock);
+
+ touch_softlockup_watchdog();
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+
+#endif
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+ return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+}
Index: linux-2.6-2/include/linux/sched.h
===================================================================
--- linux-2.6-2.orig/include/linux/sched.h
+++ linux-2.6-2/include/linux/sched.h
@@ -1551,6 +1551,35 @@ static inline int set_cpus_allowed(struc
extern unsigned long long sched_clock(void);
+#ifdef CONFIG_HAVE_STABLE_CLOCK
+static inline void sched_clock_init(void)
+{
+}
+
+static inline u64 sched_clock_cpu(int cpu)
+{
+ return sched_clock();
+}
+
+static inline void sched_clock_tick(void)
+{
+}
+
+static inline void sched_clock_idle_sleep_event(void)
+{
+}
+
+static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+}
+#else
+extern void sched_clock_init(void);
+extern u64 sched_clock_cpu(int cpu);
+extern void sched_clock_tick(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+#endif
+
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
Index: linux-2.6-2/kernel/Makefile
===================================================================
--- linux-2.6-2.orig/kernel/Makefile
+++ linux-2.6-2/kernel/Makefile
@@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
- notifier.o ksysfs.o pm_qos_params.o
+ notifier.o ksysfs.o pm_qos_params.o sched_clock.o
obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
Index: linux-2.6-2/kernel/sched_debug.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_debug.c
+++ linux-2.6-2/kernel/sched_debug.c
@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m
PN(next_balance);
P(curr->pid);
PN(clock);
- PN(idle_clock);
- PN(prev_clock_raw);
- P(clock_warps);
- P(clock_overflows);
- P(clock_underflows);
- P(clock_deep_idle_events);
- PN(clock_max_delta);
P(cpu_load[0]);
P(cpu_load[1]);
P(cpu_load[2]);
Index: linux-2.6-2/kernel/sched_fair.c
===================================================================
--- linux-2.6-2.orig/kernel/sched_fair.c
+++ linux-2.6-2/kernel/sched_fair.c
@@ -957,7 +957,7 @@ static void yield_task_fair(struct rq *r
return;
if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
- __update_rq_clock(rq);
+ update_rq_clock(rq);
/*
* Update run-time statistics of the 'current'.
*/
Index: linux-2.6-2/init/main.c
===================================================================
--- linux-2.6-2.orig/init/main.c
+++ linux-2.6-2/init/main.c
@@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void
softirq_init();
timekeeping_init();
time_init();
+ sched_clock_init();
profile_init();
if (!irqs_disabled())
printk("start_kernel(): bug: interrupts were enabled early\n");
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:29 [RFC][PATCH] sched_clock_cpu() Peter Zijlstra
2008-05-03 16:29 ` Peter Zijlstra
@ 2008-05-03 16:37 ` Ingo Molnar
2008-05-03 16:37 ` Ingo Molnar
2008-05-03 16:55 ` Ingo Molnar
2008-05-03 17:01 ` Ingo Molnar
3 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2008-05-03 16:37 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, David Miller, Guillaume Chazarain, Andi Kleen,
linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> Hi,
>
> This is my current proposal to replace the rq->clock stuff (and
> possibly cpu_clock()).
>
> it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
> whole body hurt like hell, so I'm not getting anything done.
ouch - getter better soon!
> Brain dump before I crash:
>
> - architectures that have a 'perfect' hardware clock can set
> CONFIG_HAVE_STABLE_CLOCK
ok.
> - the 'jiffie' window might be superfulous when we update tick_gtod
> before the __update_sched_clock() call in sched_clock_tick()
>
> - cpu_clock() might be implemented as:
>
> sched_clock_cpu(smp_processor_id())
>
> if the accuracy proves good enough - how far can TSC drift in a
> single jiffie when considering the filtering and idle hooks?
it 'can' be very bad - so we have to assume it's random and fall back to
jiffies quality in that case. In practice on most x86 CPUs it wont drift
that far - even drifting TSCs drift minimally (there are AMD
Athlon64/Opteron CPUs where a CPU in HLT will cause the clock and the
TSC to drift a bit) - and stopping TSCs will just stop.
But in terms of BIOSes trying to fix things up and in terms of cpufreq
artifacts (the CPU's clock itself going through transients) anything can
happen and the code must be robust.
> - what other architectures besides x86 would need this?
we rarely get any interactivity reports from anything non-x86 so i doubt
it truly matters on anything but x86. If it _breaks_ in terms of
crashing or locking we do hear from other architectures ;-)
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:37 ` Ingo Molnar
@ 2008-05-03 16:37 ` Ingo Molnar
0 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2008-05-03 16:37 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, David Miller, Guillaume Chazarain, Andi Kleen,
linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> Hi,
>
> This is my current proposal to replace the rq->clock stuff (and
> possibly cpu_clock()).
>
> it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
> whole body hurt like hell, so I'm not getting anything done.
ouch - getter better soon!
> Brain dump before I crash:
>
> - architectures that have a 'perfect' hardware clock can set
> CONFIG_HAVE_STABLE_CLOCK
ok.
> - the 'jiffie' window might be superfulous when we update tick_gtod
> before the __update_sched_clock() call in sched_clock_tick()
>
> - cpu_clock() might be implemented as:
>
> sched_clock_cpu(smp_processor_id())
>
> if the accuracy proves good enough - how far can TSC drift in a
> single jiffie when considering the filtering and idle hooks?
it 'can' be very bad - so we have to assume it's random and fall back to
jiffies quality in that case. In practice on most x86 CPUs it wont drift
that far - even drifting TSCs drift minimally (there are AMD
Athlon64/Opteron CPUs where a CPU in HLT will cause the clock and the
TSC to drift a bit) - and stopping TSCs will just stop.
But in terms of BIOSes trying to fix things up and in terms of cpufreq
artifacts (the CPU's clock itself going through transients) anything can
happen and the code must be robust.
> - what other architectures besides x86 would need this?
we rarely get any interactivity reports from anything non-x86 so i doubt
it truly matters on anything but x86. If it _breaks_ in terms of
crashing or locking we do hear from other architectures ;-)
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:29 [RFC][PATCH] sched_clock_cpu() Peter Zijlstra
2008-05-03 16:29 ` Peter Zijlstra
2008-05-03 16:37 ` Ingo Molnar
@ 2008-05-03 16:55 ` Ingo Molnar
2008-05-03 17:30 ` Dhaval Giani
2008-05-03 17:01 ` Ingo Molnar
3 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2008-05-03 16:55 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, David Miller, Guillaume Chazarain, Andi Kleen,
linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> it _DOESN'T_ boot ;-/ [...]
FYI, the merged version against sched-devel.git does boot fine here and
the clock does seem to advance as expected.
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:55 ` Ingo Molnar
@ 2008-05-03 17:30 ` Dhaval Giani
2008-05-03 17:30 ` Dhaval Giani
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: Dhaval Giani @ 2008-05-03 17:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Thomas Gleixner, David Miller,
Guillaume Chazarain, Andi Kleen, linux-kernel, linux-arch,
Mike Galbraith
On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
>
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > it _DOESN'T_ boot ;-/ [...]
>
> FYI, the merged version against sched-devel.git does boot fine here and
> the clock does seem to advance as expected.
>
boots here against linus as well.
> Ingo
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 17:30 ` Dhaval Giani
@ 2008-05-03 17:30 ` Dhaval Giani
2008-05-04 4:16 ` David Miller
2008-05-04 4:38 ` Dhaval Giani
2 siblings, 0 replies; 18+ messages in thread
From: Dhaval Giani @ 2008-05-03 17:30 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Thomas Gleixner, David Miller,
Guillaume Chazarain, Andi Kleen, linux-kernel, linux-arch,
Mike Galbraith
On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
>
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > it _DOESN'T_ boot ;-/ [...]
>
> FYI, the merged version against sched-devel.git does boot fine here and
> the clock does seem to advance as expected.
>
boots here against linus as well.
> Ingo
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 17:30 ` Dhaval Giani
2008-05-03 17:30 ` Dhaval Giani
@ 2008-05-04 4:16 ` David Miller
2008-05-04 4:30 ` Dhaval Giani
2008-05-04 4:38 ` Dhaval Giani
2 siblings, 1 reply; 18+ messages in thread
From: David Miller @ 2008-05-04 4:16 UTC (permalink / raw)
To: dhaval
Cc: mingo, a.p.zijlstra, tglx, guichaz, andi, linux-kernel,
linux-arch, efault
From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Date: Sat, 3 May 2008 23:00:23 +0530
> On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
> >
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > it _DOESN'T_ boot ;-/ [...]
> >
> > FYI, the merged version against sched-devel.git does boot fine here and
> > the clock does seem to advance as expected.
> >
>
> boots here against linus as well.
If you tested on powerpc (just guessing), did you set
HAVE_STABLE_CLOCK in arch/powerpc/Kconfig? That's
what you'll need to do to take advantage of the
new code.
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 4:16 ` David Miller
@ 2008-05-04 4:30 ` Dhaval Giani
0 siblings, 0 replies; 18+ messages in thread
From: Dhaval Giani @ 2008-05-04 4:30 UTC (permalink / raw)
To: David Miller
Cc: mingo, a.p.zijlstra, tglx, guichaz, andi, linux-kernel,
linux-arch, efault
On Sat, May 03, 2008 at 09:16:46PM -0700, David Miller wrote:
> From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
> Date: Sat, 3 May 2008 23:00:23 +0530
>
> > On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
> > >
> > > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > >
> > > > it _DOESN'T_ boot ;-/ [...]
> > >
> > > FYI, the merged version against sched-devel.git does boot fine here and
> > > the clock does seem to advance as expected.
> > >
> >
> > boots here against linus as well.
>
> If you tested on powerpc (just guessing), did you set
> HAVE_STABLE_CLOCK in arch/powerpc/Kconfig? That's
> what you'll need to do to take advantage of the
> new code.
I was testing on x86. Let me boot it on a powerpc box, and give feedback
(I was more interested in getting it boot the first time and it was
already late :) )
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 17:30 ` Dhaval Giani
2008-05-03 17:30 ` Dhaval Giani
2008-05-04 4:16 ` David Miller
@ 2008-05-04 4:38 ` Dhaval Giani
2008-05-04 4:38 ` Dhaval Giani
2 siblings, 1 reply; 18+ messages in thread
From: Dhaval Giani @ 2008-05-04 4:38 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Thomas Gleixner, David Miller,
Guillaume Chazarain, Andi Kleen, linux-kernel, linux-arch,
Mike Galbraith, jens.axboe
On Sat, May 03, 2008 at 11:00:23PM +0530, Dhaval Giani wrote:
> On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
> >
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > it _DOESN'T_ boot ;-/ [...]
> >
> > FYI, the merged version against sched-devel.git does boot fine here and
> > the clock does seem to advance as expected.
> >
>
> boots here against linus as well.
>
Unrelated, it hits this as well,
21.964266] ------------[ cut here ]------------
[ 21.978124] WARNING: at include/linux/blkdev.h:431 blk_queue_init_tags+0x114/0x158()
[ 22.013319] Modules linked in:
[ 22.022518] Pid: 1, comm: swapper Not tainted 2.6.25 #1
[ 22.042741] [<c012601f>] warn_on_slowpath+0x41/0x5f
[ 22.057667] [<c012e2dc>] ? run_timer_softirq+0x158/0x160
[ 22.073948] [<c017ade7>] ? cache_alloc_refill+0x17f/0x1de
[ 22.090485] [<c014119a>] ? __lock_release+0x1e/0x51
[ 22.105463] [<c0179c47>] ? check_poison_obj+0x2a/0x17b
[ 22.121219] [<c017ade7>] ? cache_alloc_refill+0x17f/0x1de
[ 22.137760] [<c0179aa4>] ? poison_obj+0x1e/0x3b
[ 22.151698] [<c0179650>] ? dbg_redzone1+0x15/0x1c
[ 22.166157] [<c017af76>] ? cache_alloc_debugcheck_after+0x130/0x150
[ 22.187398] [<c017b3c5>] ? __kmalloc+0x100/0x122
[ 22.201595] [<c023b103>] ? init_tag_map+0x64/0x8b
[ 22.216055] [<c023b103>] ? init_tag_map+0x64/0x8b
[ 22.230510] [<c023b154>] ? __blk_queue_init_tags+0x2a/0x4e
[ 22.247311] [<c023b29a>] blk_queue_init_tags+0x114/0x158
[ 22.263537] [<c032d6eb>] ahc_platform_set_tags+0x130/0x17e
[ 22.280284] [<c032d82d>] ahc_linux_device_queue_depth+0x7f/0xdf
[ 22.300205] [<c0315d41>] ? scsi_add_lun+0x1f8/0x326
[ 22.316370] [<c032cb2b>] ahc_linux_slave_configure+0x36/0x53
[ 22.335450] [<c0315e24>] scsi_add_lun+0x2db/0x326
[ 22.349856] [<c0315fe1>] scsi_probe_and_add_lun+0x172/0x204
[ 22.367268] [<c03166e0>] __scsi_scan_target+0x86/0xcc
[ 22.382713] [<c03167f9>] scsi_scan_channel+0x3f/0x5e
[ 22.397898] [<c0316890>] scsi_scan_host_selected+0x78/0xa9
[ 22.414645] [<c0316b5c>] do_scsi_scan_host+0x5a/0x63
[ 22.429830] [<c0316bb2>] scsi_scan_host+0x33/0x75
[ 22.444235] [<c032d298>] ahc_linux_register_host+0x190/0x19a
[ 22.461503] [<c0253982>] ? pci_get_slot+0x61/0x68
[ 22.475961] [<c0245a56>] ? kobject_put+0x3c/0x41
[ 22.490158] [<c02c388f>] ? put_device+0x11/0x13
[ 22.504095] [<c025377d>] ? pci_dev_put+0xf/0x12
[ 22.518034] [<c032f9d1>] ahc_linux_pci_dev_probe+0x15a/0x164
[ 22.536537] [<c025349f>] ? pci_match_device+0x8c/0x9b
[ 22.552702] [<c014119a>] ? __lock_release+0x1e/0x51
[ 22.569672] [<c025349f>] ? pci_match_device+0x8c/0x9b
[ 22.584871] [<c0412ec8>] ? _spin_unlock+0x27/0x3c
[ 22.599306] [<c02534bb>] pci_call_probe+0xd/0x10
[ 22.613429] [<c02534ef>] __pci_device_probe+0x31/0x43
[ 22.628848] [<c0253522>] pci_device_probe+0x21/0x34
[ 22.643750] [<c02c53cc>] really_probe+0x74/0xf2
[ 22.657634] [<c02c5494>] driver_probe_device+0x37/0x40
[ 22.673340] [<c02c5549>] __driver_attach+0x3d/0x5f
[ 22.688007] [<c02c47f5>] bus_for_each_dev+0x38/0x5d
[ 22.702933] [<c0245795>] ? kobject_init_and_add+0x20/0x22
[ 22.723240] [<c02c557f>] driver_attach+0x14/0x16
[ 22.737433] [<c02c550c>] ? __driver_attach+0x0/0x5f
[ 22.752410] [<c02c4dc0>] bus_add_driver+0x99/0x149
[ 22.767077] [<c02c5912>] driver_register+0x69/0x8d
[ 22.781955] [<c02536d0>] __pci_register_driver+0x40/0x61
[ 22.798179] [<c05d485e>] ? kernel_init+0x0/0x93
[ 22.812119] [<c032fac0>] ahc_linux_pci_init+0x14/0x16
[ 22.827565] [<c05ef704>] ahc_linux_init+0x58/0x5c
[ 22.841982] [<c05d4737>] do_initcalls+0x59/0x131
[ 22.856142] [<c01af795>] ? create_proc_entry+0x67/0x7b
[ 22.871900] [<c0159447>] ? register_irq_proc+0xa4/0xba
[ 22.887656] [<c01a0000>] ? bio_copy_kern_endio+0x6c/0xa1
[ 22.903935] [<c05d485e>] ? kernel_init+0x0/0x93
[ 22.917872] [<c05d482b>] do_basic_setup+0x1c/0x1e
[ 22.932275] [<c05d48b6>] kernel_init+0x58/0x93
[ 22.945902] [<c010463f>] kernel_thread_helper+0x7/0x10
[ 22.961606] =======================
[ 22.973591] ---[ end trace 441bd607bde65755 ]---
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 4:38 ` Dhaval Giani
@ 2008-05-04 4:38 ` Dhaval Giani
0 siblings, 0 replies; 18+ messages in thread
From: Dhaval Giani @ 2008-05-04 4:38 UTC (permalink / raw)
To: Ingo Molnar
Cc: Peter Zijlstra, Thomas Gleixner, David Miller,
Guillaume Chazarain, Andi Kleen, linux-kernel, linux-arch,
Mike Galbraith, jens.axboe
On Sat, May 03, 2008 at 11:00:23PM +0530, Dhaval Giani wrote:
> On Sat, May 03, 2008 at 06:55:11PM +0200, Ingo Molnar wrote:
> >
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > it _DOESN'T_ boot ;-/ [...]
> >
> > FYI, the merged version against sched-devel.git does boot fine here and
> > the clock does seem to advance as expected.
> >
>
> boots here against linus as well.
>
Unrelated, it hits this as well,
21.964266] ------------[ cut here ]------------
[ 21.978124] WARNING: at include/linux/blkdev.h:431 blk_queue_init_tags+0x114/0x158()
[ 22.013319] Modules linked in:
[ 22.022518] Pid: 1, comm: swapper Not tainted 2.6.25 #1
[ 22.042741] [<c012601f>] warn_on_slowpath+0x41/0x5f
[ 22.057667] [<c012e2dc>] ? run_timer_softirq+0x158/0x160
[ 22.073948] [<c017ade7>] ? cache_alloc_refill+0x17f/0x1de
[ 22.090485] [<c014119a>] ? __lock_release+0x1e/0x51
[ 22.105463] [<c0179c47>] ? check_poison_obj+0x2a/0x17b
[ 22.121219] [<c017ade7>] ? cache_alloc_refill+0x17f/0x1de
[ 22.137760] [<c0179aa4>] ? poison_obj+0x1e/0x3b
[ 22.151698] [<c0179650>] ? dbg_redzone1+0x15/0x1c
[ 22.166157] [<c017af76>] ? cache_alloc_debugcheck_after+0x130/0x150
[ 22.187398] [<c017b3c5>] ? __kmalloc+0x100/0x122
[ 22.201595] [<c023b103>] ? init_tag_map+0x64/0x8b
[ 22.216055] [<c023b103>] ? init_tag_map+0x64/0x8b
[ 22.230510] [<c023b154>] ? __blk_queue_init_tags+0x2a/0x4e
[ 22.247311] [<c023b29a>] blk_queue_init_tags+0x114/0x158
[ 22.263537] [<c032d6eb>] ahc_platform_set_tags+0x130/0x17e
[ 22.280284] [<c032d82d>] ahc_linux_device_queue_depth+0x7f/0xdf
[ 22.300205] [<c0315d41>] ? scsi_add_lun+0x1f8/0x326
[ 22.316370] [<c032cb2b>] ahc_linux_slave_configure+0x36/0x53
[ 22.335450] [<c0315e24>] scsi_add_lun+0x2db/0x326
[ 22.349856] [<c0315fe1>] scsi_probe_and_add_lun+0x172/0x204
[ 22.367268] [<c03166e0>] __scsi_scan_target+0x86/0xcc
[ 22.382713] [<c03167f9>] scsi_scan_channel+0x3f/0x5e
[ 22.397898] [<c0316890>] scsi_scan_host_selected+0x78/0xa9
[ 22.414645] [<c0316b5c>] do_scsi_scan_host+0x5a/0x63
[ 22.429830] [<c0316bb2>] scsi_scan_host+0x33/0x75
[ 22.444235] [<c032d298>] ahc_linux_register_host+0x190/0x19a
[ 22.461503] [<c0253982>] ? pci_get_slot+0x61/0x68
[ 22.475961] [<c0245a56>] ? kobject_put+0x3c/0x41
[ 22.490158] [<c02c388f>] ? put_device+0x11/0x13
[ 22.504095] [<c025377d>] ? pci_dev_put+0xf/0x12
[ 22.518034] [<c032f9d1>] ahc_linux_pci_dev_probe+0x15a/0x164
[ 22.536537] [<c025349f>] ? pci_match_device+0x8c/0x9b
[ 22.552702] [<c014119a>] ? __lock_release+0x1e/0x51
[ 22.569672] [<c025349f>] ? pci_match_device+0x8c/0x9b
[ 22.584871] [<c0412ec8>] ? _spin_unlock+0x27/0x3c
[ 22.599306] [<c02534bb>] pci_call_probe+0xd/0x10
[ 22.613429] [<c02534ef>] __pci_device_probe+0x31/0x43
[ 22.628848] [<c0253522>] pci_device_probe+0x21/0x34
[ 22.643750] [<c02c53cc>] really_probe+0x74/0xf2
[ 22.657634] [<c02c5494>] driver_probe_device+0x37/0x40
[ 22.673340] [<c02c5549>] __driver_attach+0x3d/0x5f
[ 22.688007] [<c02c47f5>] bus_for_each_dev+0x38/0x5d
[ 22.702933] [<c0245795>] ? kobject_init_and_add+0x20/0x22
[ 22.723240] [<c02c557f>] driver_attach+0x14/0x16
[ 22.737433] [<c02c550c>] ? __driver_attach+0x0/0x5f
[ 22.752410] [<c02c4dc0>] bus_add_driver+0x99/0x149
[ 22.767077] [<c02c5912>] driver_register+0x69/0x8d
[ 22.781955] [<c02536d0>] __pci_register_driver+0x40/0x61
[ 22.798179] [<c05d485e>] ? kernel_init+0x0/0x93
[ 22.812119] [<c032fac0>] ahc_linux_pci_init+0x14/0x16
[ 22.827565] [<c05ef704>] ahc_linux_init+0x58/0x5c
[ 22.841982] [<c05d4737>] do_initcalls+0x59/0x131
[ 22.856142] [<c01af795>] ? create_proc_entry+0x67/0x7b
[ 22.871900] [<c0159447>] ? register_irq_proc+0xa4/0xba
[ 22.887656] [<c01a0000>] ? bio_copy_kern_endio+0x6c/0xa1
[ 22.903935] [<c05d485e>] ? kernel_init+0x0/0x93
[ 22.917872] [<c05d482b>] do_basic_setup+0x1c/0x1e
[ 22.932275] [<c05d48b6>] kernel_init+0x58/0x93
[ 22.945902] [<c010463f>] kernel_thread_helper+0x7/0x10
[ 22.961606] =======================
[ 22.973591] ---[ end trace 441bd607bde65755 ]---
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 16:29 [RFC][PATCH] sched_clock_cpu() Peter Zijlstra
` (2 preceding siblings ...)
2008-05-03 16:55 ` Ingo Molnar
@ 2008-05-03 17:01 ` Ingo Molnar
2008-05-04 4:26 ` David Miller
3 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2008-05-03 17:01 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Thomas Gleixner, David Miller, Guillaume Chazarain, Andi Kleen,
linux-kernel, linux-arch, Mike Galbraith, Dhaval Giani
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
> whole body hurt like hell, so I'm not getting anything done.
i think i see where your boot problem comes from:
> +struct sched_clock_data {
> + spinlock_t lock;
that wont work very well when sched_clock() is called from within
CONFIG_LOCK_STAT instrumentation. Does the patch below solve the boot
problems for you?
Ingo
-------------------->
Subject: sched: sched_clock() fix
From: Ingo Molnar <mingo@elte.hu>
Date: Sat May 03 18:41:11 CEST 2008
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/sched.c | 2 --
kernel/sched_clock.c | 42 ++++++++++++++++++++++++------------------
2 files changed, 24 insertions(+), 20 deletions(-)
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -1074,8 +1074,6 @@ static struct rq *this_rq_lock(void)
return rq;
}
- WARN_ON(!irqs_disabled());
- WARN_ON(!irqs_disabled());
static void __resched_task(struct task_struct *p, int tif_bit);
static inline void resched_task(struct task_struct *p)
Index: linux/kernel/sched_clock.c
===================================================================
--- linux.orig/kernel/sched_clock.c
+++ linux/kernel/sched_clock.c
@@ -33,12 +33,18 @@
#ifndef CONFIG_HAVE_STABLE_CLOCK
struct sched_clock_data {
- spinlock_t lock;
- unsigned long prev_jiffies;
- u64 prev_raw;
- u64 tick_raw;
- u64 tick_gtod;
- u64 clock;
+ /*
+ * Raw spinlock - this is a special case: this might be called
+ * from within instrumentation code so we dont want to do any
+ * instrumentation ourselves.
+ */
+ raw_spinlock_t lock;
+
+ unsigned long prev_jiffies;
+ u64 prev_raw;
+ u64 tick_raw;
+ u64 tick_gtod;
+ u64 clock;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
@@ -62,7 +68,7 @@ void sched_clock_init(void)
for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu);
- spin_lock_init(&scd->lock);
+ scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
scd->prev_jiffies = jiffies;
scd->prev_raw = now;
scd->tick_raw = now;
@@ -116,11 +122,11 @@ static void lock_double_clock(struct sch
struct sched_clock_data *data2)
{
if (data1 < data2) {
- spin_lock(&data1->lock);
- spin_lock_nested(&data2->lock, SINGLE_DEPTH_NESTING);
+ __raw_spin_lock(&data1->lock);
+ __raw_spin_lock(&data2->lock);
} else {
- spin_lock(&data2->lock);
- spin_lock_nested(&data1->lock, SINGLE_DEPTH_NESTING);
+ __raw_spin_lock(&data2->lock);
+ __raw_spin_lock(&data1->lock);
}
}
@@ -147,14 +153,14 @@ u64 sched_clock_cpu(int cpu)
now -= my_scd->tick_gtod;
now += scd->tick_gtod;
- spin_unlock(&my_scd->lock);
+ __raw_spin_unlock(&my_scd->lock);
} else
- spin_lock(&scd->lock);
+ __raw_spin_lock(&scd->lock);
__update_sched_clock(scd, now);
clock = scd->clock;
- spin_unlock(&scd->lock);
+ __raw_spin_unlock(&scd->lock);
return clock;
}
@@ -164,7 +170,7 @@ void sched_clock_tick(void)
struct sched_clock_data *scd = this_scd();
u64 now;
- spin_lock(&scd->lock);
+ __raw_spin_lock(&scd->lock);
now = sched_clock();
__update_sched_clock(scd, now);
/*
@@ -174,7 +180,7 @@ void sched_clock_tick(void)
*/
scd->tick_raw = now;
scd->tick_gtod = ktime_to_ns(ktime_get()); // XXX get from regular tick
- spin_unlock(&scd->lock);
+ __raw_spin_unlock(&scd->lock);
}
/*
@@ -199,10 +205,10 @@ void sched_clock_idle_wakeup_event(u64 d
* and use the PM-provided delta_ns to advance the
* rq clock:
*/
- spin_lock(&scd->lock);
+ __raw_spin_lock(&scd->lock);
scd->prev_raw = sched_clock();
scd->clock += delta_ns;
- spin_unlock(&scd->lock);
+ __raw_spin_unlock(&scd->lock);
touch_softlockup_watchdog();
}
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-03 17:01 ` Ingo Molnar
@ 2008-05-04 4:26 ` David Miller
2008-05-04 5:09 ` Dhaval Giani
` (2 more replies)
0 siblings, 3 replies; 18+ messages in thread
From: David Miller @ 2008-05-04 4:26 UTC (permalink / raw)
To: mingo
Cc: a.p.zijlstra, tglx, guichaz, andi, linux-kernel, linux-arch,
efault, dhaval
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 3 May 2008 19:01:30 +0200
>
> * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>
> > it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
> > whole body hurt like hell, so I'm not getting anything done.
>
> i think i see where your boot problem comes from:
>
> > +struct sched_clock_data {
> > + spinlock_t lock;
>
> that wont work very well when sched_clock() is called from within
> CONFIG_LOCK_STAT instrumentation. Does the patch below solve the boot
> problems for you?
Also, no platform can set HAVE_STABLE_CLOCK until we instantiate it in
a Kconfig somewhere. I've choosen to do it in kernel/Kconfig.hz and
here are the sparc/sparc64 bits as well, I've booted this up with
Peter's patch on my 64-cpu niagara2 box and done some basic testing.
It would be nice if a powerpc person could test the trivial
powerpc Kconfig patch.
Possibly this should be HAVE_UNSTABLE_CLOCK, then only one platform
needs to set it :-)
sparc: Instantiate and set HAVE_STABLE_CLOCK
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index d211fdb..c60f5d4 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -69,6 +69,7 @@ config SPARC
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_ARCH_KGDB if !SMP
+ select HAVE_STABLE_CLOCK
# Identify this as a Sparc32 build
config SPARC32
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index eb36f3b..711d4b1 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -14,6 +14,7 @@ config SPARC64
select HAVE_IDE
select HAVE_LMB
select HAVE_ARCH_KGDB
+ select HAVE_STABLE_CLOCK
config GENERIC_TIME
bool
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 526128a..b88c82a 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -56,3 +56,6 @@ config HZ
config SCHED_HRTICK
def_bool HIGH_RES_TIMERS && X86
+
+config HAVE_STABLE_CLOCK
+ boolean
^ permalink raw reply related [flat|nested] 18+ messages in thread* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 4:26 ` David Miller
@ 2008-05-04 5:09 ` Dhaval Giani
2008-05-04 9:00 ` Ingo Molnar
2008-05-05 21:47 ` Tony Breeds
2 siblings, 0 replies; 18+ messages in thread
From: Dhaval Giani @ 2008-05-04 5:09 UTC (permalink / raw)
To: David Miller
Cc: mingo, a.p.zijlstra, tglx, guichaz, andi, linux-kernel,
linux-arch, efault
On Sat, May 03, 2008 at 09:26:07PM -0700, David Miller wrote:
> From: Ingo Molnar <mingo@elte.hu>
> Date: Sat, 3 May 2008 19:01:30 +0200
>
> >
> > * Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >
> > > it _DOESN'T_ boot ;-/ and I seem to have caught a flu that makes my
> > > whole body hurt like hell, so I'm not getting anything done.
> >
> > i think i see where your boot problem comes from:
> >
> > > +struct sched_clock_data {
> > > + spinlock_t lock;
> >
> > that wont work very well when sched_clock() is called from within
> > CONFIG_LOCK_STAT instrumentation. Does the patch below solve the boot
> > problems for you?
>
> Also, no platform can set HAVE_STABLE_CLOCK until we instantiate it in
> a Kconfig somewhere. I've choosen to do it in kernel/Kconfig.hz and
> here are the sparc/sparc64 bits as well, I've booted this up with
> Peter's patch on my 64-cpu niagara2 box and done some basic testing.
>
> It would be nice if a powerpc person could test the trivial
> powerpc Kconfig patch.
>
On it. Let me get hold of a powerpc box here, and try it out.
--
regards,
Dhaval
^ permalink raw reply [flat|nested] 18+ messages in thread* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 4:26 ` David Miller
2008-05-04 5:09 ` Dhaval Giani
@ 2008-05-04 9:00 ` Ingo Molnar
2008-05-05 11:32 ` Ingo Molnar
2008-05-05 21:47 ` Tony Breeds
2 siblings, 1 reply; 18+ messages in thread
From: Ingo Molnar @ 2008-05-04 9:00 UTC (permalink / raw)
To: David Miller
Cc: a.p.zijlstra, tglx, guichaz, andi, linux-kernel, linux-arch,
efault, dhaval
* David Miller <davem@davemloft.net> wrote:
> > that wont work very well when sched_clock() is called from within
> > CONFIG_LOCK_STAT instrumentation. Does the patch below solve the
> > boot problems for you?
>
> Also, no platform can set HAVE_STABLE_CLOCK until we instantiate it in
> a Kconfig somewhere. I've choosen to do it in kernel/Kconfig.hz and
> here are the sparc/sparc64 bits as well, I've booted this up with
> Peter's patch on my 64-cpu niagara2 box and done some basic testing.
applied, thanks David.
right now this topic looks good in review and in testing but it is
stalled on a bug: in overnight testing it triggered an ftrace self-test
hang that i bisected down to that patch. While that doesnt affect
mainline it's something that shows that the new sched_clock() code is
not as widely usable as the old code - have to investigate that some
more.
> It would be nice if a powerpc person could test the trivial powerpc
> Kconfig patch.
>
> Possibly this should be HAVE_UNSTABLE_CLOCK, then only one platform
> needs to set it :-)
heh, indeed :)
Initially i thought that it's better to first be safe, but this really
will only affect x86 in practice, so ... i think we'll switch around the
flag, turning this into a no-effort thing for everything but x86.
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 9:00 ` Ingo Molnar
@ 2008-05-05 11:32 ` Ingo Molnar
0 siblings, 0 replies; 18+ messages in thread
From: Ingo Molnar @ 2008-05-05 11:32 UTC (permalink / raw)
To: David Miller
Cc: a.p.zijlstra, tglx, guichaz, andi, linux-kernel, linux-arch,
efault, dhaval
* Ingo Molnar <mingo@elte.hu> wrote:
>
> * David Miller <davem@davemloft.net> wrote:
>
> > > that wont work very well when sched_clock() is called from within
> > > CONFIG_LOCK_STAT instrumentation. Does the patch below solve the
> > > boot problems for you?
> >
> > Also, no platform can set HAVE_STABLE_CLOCK until we instantiate it in
> > a Kconfig somewhere. I've choosen to do it in kernel/Kconfig.hz and
> > here are the sparc/sparc64 bits as well, I've booted this up with
> > Peter's patch on my 64-cpu niagara2 box and done some basic testing.
>
> applied, thanks David.
>
> right now this topic looks good in review and in testing but it is
> stalled on a bug: in overnight testing it triggered an ftrace
> self-test hang that i bisected down to that patch. While that doesnt
> affect mainline it's something that shows that the new sched_clock()
> code is not as widely usable as the old code - have to investigate
> that some more.
FYI, figured that out today and fixed it, so the topic has green lights
again.
Ingo
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-04 4:26 ` David Miller
2008-05-04 5:09 ` Dhaval Giani
2008-05-04 9:00 ` Ingo Molnar
@ 2008-05-05 21:47 ` Tony Breeds
2008-05-05 21:47 ` Tony Breeds
2 siblings, 1 reply; 18+ messages in thread
From: Tony Breeds @ 2008-05-05 21:47 UTC (permalink / raw)
To: David Miller
Cc: mingo, a.p.zijlstra, tglx, guichaz, andi, linux-kernel,
linux-arch, efault, dhaval
On Sat, May 03, 2008 at 09:26:07PM -0700, David Miller wrote:
> It would be nice if a powerpc person could test the trivial
> powerpc Kconfig patch.
FWIW I tested the following trivial patch for powerpc. While testing
this I discovered a i(related) ppc32 regressesion that I'm trying to
bisect/debug.
Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
---
Obviously on top of the merged patch + Dave's sparc patch
arch/powerpc/Kconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e26..0642dff 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,6 +110,7 @@ config PPC
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_LMB
+ select HAVE_STABLE_CLOCK
config EARLY_PRINTK
bool
Yours Tony
linux.conf.au http://www.marchsouth.org/
Jan 19 - 24 2009 The Australian Linux Technical Conference!
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [RFC][PATCH] sched_clock_cpu()
2008-05-05 21:47 ` Tony Breeds
@ 2008-05-05 21:47 ` Tony Breeds
0 siblings, 0 replies; 18+ messages in thread
From: Tony Breeds @ 2008-05-05 21:47 UTC (permalink / raw)
To: David Miller
Cc: mingo, a.p.zijlstra, tglx, guichaz, andi, linux-kernel,
linux-arch, efault, dhaval
On Sat, May 03, 2008 at 09:26:07PM -0700, David Miller wrote:
> It would be nice if a powerpc person could test the trivial
> powerpc Kconfig patch.
FWIW I tested the following trivial patch for powerpc. While testing
this I discovered a i(related) ppc32 regressesion that I'm trying to
bisect/debug.
Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
---
Obviously on top of the merged patch + Dave's sparc patch
arch/powerpc/Kconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3934e26..0642dff 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -110,6 +110,7 @@ config PPC
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_LMB
+ select HAVE_STABLE_CLOCK
config EARLY_PRINTK
bool
Yours Tony
linux.conf.au http://www.marchsouth.org/
Jan 19 - 24 2009 The Australian Linux Technical Conference!
^ permalink raw reply related [flat|nested] 18+ messages in thread
end of thread, other threads:[~2008-05-05 22:18 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-05-03 16:29 [RFC][PATCH] sched_clock_cpu() Peter Zijlstra
2008-05-03 16:29 ` Peter Zijlstra
2008-05-03 16:37 ` Ingo Molnar
2008-05-03 16:37 ` Ingo Molnar
2008-05-03 16:55 ` Ingo Molnar
2008-05-03 17:30 ` Dhaval Giani
2008-05-03 17:30 ` Dhaval Giani
2008-05-04 4:16 ` David Miller
2008-05-04 4:30 ` Dhaval Giani
2008-05-04 4:38 ` Dhaval Giani
2008-05-04 4:38 ` Dhaval Giani
2008-05-03 17:01 ` Ingo Molnar
2008-05-04 4:26 ` David Miller
2008-05-04 5:09 ` Dhaval Giani
2008-05-04 9:00 ` Ingo Molnar
2008-05-05 11:32 ` Ingo Molnar
2008-05-05 21:47 ` Tony Breeds
2008-05-05 21:47 ` Tony Breeds
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox