public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Russell King <rmk+lkml@arm.linux.org.uk>
To: Nicolas Pitre <nico@cam.org>, Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>, linux-kernel@vger.kernel.org
Subject: Re: sched_clock() uses are broken
Date: Tue, 2 May 2006 22:35:47 +0100	[thread overview]
Message-ID: <20060502213547.GA19804@flint.arm.linux.org.uk> (raw)
In-Reply-To: <Pine.LNX.4.64.0605021521390.28543@localhost.localdomain>

On Tue, May 02, 2006 at 03:23:01PM -0400, Nicolas Pitre wrote:
> On Tue, 2 May 2006, Russell King wrote:
> 
> > On Tue, May 02, 2006 at 03:05:22PM -0400, Nicolas Pitre wrote:
> > > If we're discussing the addition of a sched_clock_diff(), why whouldn't 
> > > shed_clock() return anything it wants in that context?  It could be 
> > > redefined to have a return value meaningful only to shed_clock_diff()?
> > 
> > If we're talking about doing that, we need to replace sched_clock()
> > to ensure that we all users are aware that it has changed.
> > 
> > I did think about that for my original fix proposal, but stepped back
> > because that's a bigger change - and is something for post-2.6.17.
> > The smallest fix (suitable for -rc kernels) is as I detailed.
> 
> Oh agreed.

(Added Ingo - don't know if you saw my original message.)

Actually, I'm not entirely convinced that the smallest fix is going to
be the best solution - it looks too complex.

Note the code change in update_cpu_clock and sched_current_time - arguably
both are buggy as they stand today when the values wrap.

Comments anyone?

diff --git a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -988,6 +988,11 @@ static inline int set_cpus_allowed(task_
 extern unsigned long long sched_clock(void);
 extern unsigned long long current_sched_time(const task_t *current_task);
 
+static inline unsigned long long sched_clock_diff(unsigned long long t1, unsigned long long t0)
+{
+	return t1 - t0;
+}
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
diff --git a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -731,7 +731,7 @@ static inline void __activate_idle_task(
 static int recalc_task_prio(task_t *p, unsigned long long now)
 {
 	/* Caller must always ensure 'now >= p->timestamp' */
-	unsigned long long __sleep_time = now - p->timestamp;
+	unsigned long long __sleep_time = sched_clock_diff(now, p->timestamp);
 	unsigned long sleep_time;
 
 	if (batch_task(p))
@@ -807,7 +807,7 @@ static void activate_task(task_t *p, run
 	if (!local) {
 		/* Compensate for drifting sched_clock */
 		runqueue_t *this_rq = this_rq();
-		now = (now - this_rq->timestamp_last_tick)
+		now = sched_clock_diff(now, this_rq->timestamp_last_tick)
 			+ rq->timestamp_last_tick;
 	}
 #endif
@@ -2512,8 +2512,10 @@ EXPORT_PER_CPU_SYMBOL(kstat);
 static inline void update_cpu_clock(task_t *p, runqueue_t *rq,
 				    unsigned long long now)
 {
-	unsigned long long last = max(p->timestamp, rq->timestamp_last_tick);
-	p->sched_time += now - last;
+	unsigned long long d1, d2;
+	d1 = sched_clock_diff(now, p->timestamp);
+	d2 = sched_clock_diff(now, rq->timestamp_last_tick);
+	p->sched_time += min(d1, d2);
 }
 
 /*
@@ -2522,11 +2524,13 @@ static inline void update_cpu_clock(task
  */
 unsigned long long current_sched_time(const task_t *tsk)
 {
-	unsigned long long ns;
+	unsigned long long now, d1, d2, ns;
 	unsigned long flags;
 	local_irq_save(flags);
-	ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick);
-	ns = tsk->sched_time + (sched_clock() - ns);
+	now = sched_clock();
+	d1 = sched_clock_diff(now, tsk->timestamp);
+	d2 = sched_clock_diff(now, task_rq(tsk)->timestamp_last_tick);
+	ns = tsk->sched_time + min(d1, d2);
 	local_irq_restore(flags);
 	return ns;
 }
@@ -2925,7 +2929,7 @@ asmlinkage void __sched schedule(void)
 	runqueue_t *rq;
 	prio_array_t *array;
 	struct list_head *queue;
-	unsigned long long now;
+	unsigned long long now, sleep;
 	unsigned long run_time;
 	int cpu, idx, new_prio;
 
@@ -2960,11 +2964,10 @@ need_resched_nonpreemptible:
 
 	schedstat_inc(rq, sched_cnt);
 	now = sched_clock();
-	if (likely((long long)(now - prev->timestamp) < NS_MAX_SLEEP_AVG)) {
-		run_time = now - prev->timestamp;
-		if (unlikely((long long)(now - prev->timestamp) < 0))
-			run_time = 0;
-	} else
+	sleep = sched_clock_diff(now, prev->timestamp);
+	if (likely(sleep < NS_MAX_SLEEP_AVG))
+		run_time = sleep;
+	else
 		run_time = NS_MAX_SLEEP_AVG;
 
 	/*
@@ -3039,8 +3042,8 @@ go_idle:
 	next = list_entry(queue->next, task_t, run_list);
 
 	if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
-		unsigned long long delta = now - next->timestamp;
-		if (unlikely((long long)(now - next->timestamp) < 0))
+		unsigned long long delta = sched_clock_diff(now, next->timestamp);
+		if (unlikely((long long)delta < 0))
 			delta = 0;
 
 		if (next->sleep_type == SLEEP_INTERACTIVE)


-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core

  reply	other threads:[~2006-05-02 21:35 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-05-02 13:29 sched_clock() uses are broken Russell King
2006-05-02 14:21 ` Nick Piggin
2006-05-02 16:43 ` Andi Kleen
2006-05-02 16:50   ` Russell King
2006-05-02 17:01     ` Andi Kleen
2006-05-02 17:18       ` Nicolas Pitre
2006-05-02 18:55         ` Russell King
2006-05-02 19:05           ` Nicolas Pitre
2006-05-02 19:08             ` Russell King
2006-05-02 19:23               ` Nicolas Pitre
2006-05-02 21:35                 ` Russell King [this message]
2006-05-02 17:15     ` Nicolas Pitre
2006-05-04  3:50       ` George Anzinger
2006-05-04 14:18         ` Nicolas Pitre
2006-05-02 16:54   ` Christopher Friesen
2006-05-02 16:59     ` Andi Kleen
2006-05-02 17:07       ` Nick Piggin
2006-05-03  7:09         ` Mike Galbraith
2006-05-03  7:40           ` Andi Kleen
2006-05-03  9:11             ` Mike Galbraith
2006-05-03  9:16               ` Andi Kleen
2006-05-03  9:31                 ` Mike Galbraith
2006-05-07 12:33           ` Nick Piggin
2006-05-07 12:43             ` Russell King
2006-05-07 12:56               ` Nick Piggin
2006-05-07 13:00                 ` Nick Piggin
2006-05-07 13:18                   ` Russell King
2006-05-07 13:30                     ` Nick Piggin
2006-05-07 13:55                       ` Russell King
2006-05-07 14:04                         ` Nick Piggin
2006-05-07 16:03                           ` Andi Kleen
2006-05-07 16:53                       ` Russell King
2006-05-07 17:52                         ` Mike Galbraith
2006-05-07 17:37               ` Mike Galbraith
2006-05-07 17:32             ` Mike Galbraith
2006-05-08  4:14               ` Mike Galbraith
2006-05-08  4:37                 ` Mike Galbraith
2006-05-08  4:46                   ` Nick Piggin
2006-05-08  5:24                     ` Mike Galbraith
2006-05-08  5:30                       ` Nick Piggin
2006-05-04 20:02 ` Florian Paul Schmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060502213547.GA19804@flint.arm.linux.org.uk \
    --to=rmk+lkml@arm.linux.org.uk \
    --cc=ak@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nico@cam.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox