* [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
@ 2007-08-27 21:56 Venki Pallipadi
2007-08-31 18:22 ` Ingo Molnar
2007-09-01 22:23 ` Andi Kleen
0 siblings, 2 replies; 9+ messages in thread
From: Venki Pallipadi @ 2007-08-27 21:56 UTC (permalink / raw)
To: Thomas Gleixner, Andrew Morton, Ingo Molnar; +Cc: linux-kernel
Current idle time in kstat is based on jiffies and is coarse grained.
tick_sched.idle_sleeptime is making some attempt to keep track of
idle time in a fine grained manner. But, it is not handling
the time spent in interrupts fully.
Make tick_sched.idle_sleeptime accurate with respect to time spent on
handling interrupts and also add tick_sched.idle_lastupdate, which
keeps track of last time when idle_sleeptime was updated.
This statistics will be crucial for cpufreq-ondemand governor, which can shed
some conservative gaurd band that is uses today while setting the frequency.
The ondemand changes that uses the exact idle time is coming soon.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Index: linux-2.6.22/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.22.orig/kernel/time/tick-sched.c
+++ linux-2.6.22/kernel/time/tick-sched.c
@@ -141,6 +141,43 @@ void tick_nohz_update_jiffies(void)
local_irq_restore(flags);
}
+void tick_nohz_stop_idle(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+ if (ts->idle_active) {
+ ktime_t now, delta;
+ now = ktime_get();
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ ts->idle_active = 0;
+ }
+}
+
+static void tick_nohz_start_idle(int cpu)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ ktime_t now, delta;
+
+ now = ktime_get();
+ if (ts->idle_active) {
+ delta = ktime_sub(now, ts->idle_entrytime);
+ ts->idle_lastupdate = now;
+ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+ }
+ ts->idle_entrytime = now;
+ ts->idle_active = 1;
+}
+
+u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
+{
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+ *last_update_time = ktime_to_us(ts->idle_lastupdate);
+ return ktime_to_us(ts->idle_sleeptime);
+}
+
/**
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
*
@@ -152,13 +189,15 @@ void tick_nohz_stop_sched_tick(void)
{
unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
struct tick_sched *ts;
- ktime_t last_update, expires, now, delta;
+ ktime_t last_update, expires, now;
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
int cpu;
local_irq_save(flags);
cpu = smp_processor_id();
+ tick_nohz_start_idle(cpu);
+
ts = &per_cpu(tick_cpu_sched, cpu);
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
@@ -178,19 +217,7 @@ void tick_nohz_stop_sched_tick(void)
}
}
- now = ktime_get();
- /*
- * When called from irq_exit we need to account the idle sleep time
- * correctly.
- */
- if (ts->tick_stopped) {
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
- }
-
- ts->idle_entrytime = now;
ts->idle_calls++;
-
/* Read jiffies and the time when jiffies were updated last */
do {
seq = read_seqbegin(&xtime_lock);
@@ -320,23 +347,22 @@ void tick_nohz_restart_sched_tick(void)
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
unsigned long ticks;
- ktime_t now, delta;
+ ktime_t now;
+
+ local_irq_disable();
+ tick_nohz_stop_idle(cpu);
- if (!ts->tick_stopped)
+ if (!ts->tick_stopped) {
+ local_irq_enable();
return;
+ }
/* Update jiffies first */
- now = ktime_get();
-
- local_irq_disable();
select_nohz_load_balancer(0);
+ now = ktime_get();
tick_do_update_jiffies64(now);
cpu_clear(cpu, nohz_cpu_mask);
- /* Account the idle time */
- delta = ktime_sub(now, ts->idle_entrytime);
- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
-
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
Index: linux-2.6.22/include/linux/tick.h
===================================================================
--- linux-2.6.22.orig/include/linux/tick.h
+++ linux-2.6.22/include/linux/tick.h
@@ -51,8 +51,10 @@ struct tick_sched {
unsigned long idle_jiffies;
unsigned long idle_calls;
unsigned long idle_sleeps;
+ int idle_active;
ktime_t idle_entrytime;
ktime_t idle_sleeptime;
+ ktime_t idle_lastupdate;
ktime_t sleep_length;
unsigned long last_jiffies;
unsigned long next_jiffies;
@@ -103,6 +105,8 @@ extern void tick_nohz_stop_sched_tick(vo
extern void tick_nohz_restart_sched_tick(void);
extern void tick_nohz_update_jiffies(void);
extern ktime_t tick_nohz_get_sleep_length(void);
+extern void tick_nohz_stop_idle(int cpu);
+extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
# else
static inline void tick_nohz_stop_sched_tick(void) { }
static inline void tick_nohz_restart_sched_tick(void) { }
@@ -113,6 +117,8 @@ static inline ktime_t tick_nohz_get_slee
return len;
}
+static inline void tick_nohz_stop_idle(int cpu) { }
+static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return 0; }
# endif /* !NO_HZ */
#endif
Index: linux-2.6.22/kernel/softirq.c
===================================================================
--- linux-2.6.22.orig/kernel/softirq.c
+++ linux-2.6.22/kernel/softirq.c
@@ -280,9 +280,14 @@ EXPORT_SYMBOL(do_softirq);
*/
void irq_enter(void)
{
+#ifdef CONFIG_NO_HZ
+ int cpu = smp_processor_id();
+ if (idle_cpu(cpu) && !in_interrupt())
+ tick_nohz_stop_idle(cpu);
+#endif
__irq_enter();
#ifdef CONFIG_NO_HZ
- if (idle_cpu(smp_processor_id()))
+ if (idle_cpu(cpu))
tick_nohz_update_jiffies();
#endif
}
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-08-27 21:56 [PATCH] Track accurate idle time with tick_sched.idle_sleeptime Venki Pallipadi
@ 2007-08-31 18:22 ` Ingo Molnar
2007-08-31 18:24 ` Ingo Molnar
2007-09-01 22:23 ` Andi Kleen
1 sibling, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2007-08-31 18:22 UTC (permalink / raw)
To: Venki Pallipadi; +Cc: Thomas Gleixner, Andrew Morton, linux-kernel
* Venki Pallipadi <venkatesh.pallipadi@intel.com> wrote:
> Current idle time in kstat is based on jiffies and is coarse grained.
> tick_sched.idle_sleeptime is making some attempt to keep track of idle
> time in a fine grained manner. But, it is not handling the time spent
> in interrupts fully.
>
> Make tick_sched.idle_sleeptime accurate with respect to time spent on
> handling interrupts and also add tick_sched.idle_lastupdate, which
> keeps track of last time when idle_sleeptime was updated.
>
> This statistics will be crucial for cpufreq-ondemand governor, which
> can shed some conservative gaurd band that is uses today while setting
> the frequency. The ondemand changes that uses the exact idle time is
> coming soon.
nice stuff! I've added your patch to the scheduler queue - but this will
probably be 2.6.24 material.
Acked-by: Ingo Molnar <mingo@elte.hu>
Ingo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-08-31 18:22 ` Ingo Molnar
@ 2007-08-31 18:24 ` Ingo Molnar
2007-08-31 18:26 ` Pallipadi, Venkatesh
0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2007-08-31 18:24 UTC (permalink / raw)
To: Venki Pallipadi; +Cc: Thomas Gleixner, Andrew Morton, linux-kernel
* Ingo Molnar <mingo@elte.hu> wrote:
> nice stuff! I've added your patch to the scheduler queue - but this
> will probably be 2.6.24 material.
btw., this patch depends on the latest acpi tree, right?
Ingo
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-08-31 18:24 ` Ingo Molnar
@ 2007-08-31 18:26 ` Pallipadi, Venkatesh
0 siblings, 0 replies; 9+ messages in thread
From: Pallipadi, Venkatesh @ 2007-08-31 18:26 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Thomas Gleixner, Andrew Morton, linux-kernel
>-----Original Message-----
>From: Ingo Molnar [mailto:mingo@elte.hu]
>Sent: Friday, August 31, 2007 11:24 AM
>To: Pallipadi, Venkatesh
>Cc: Thomas Gleixner; Andrew Morton; linux-kernel
>Subject: Re: [PATCH] Track accurate idle time with
>tick_sched.idle_sleeptime
>
>
>* Ingo Molnar <mingo@elte.hu> wrote:
>
>> nice stuff! I've added your patch to the scheduler queue - but this
>> will probably be 2.6.24 material.
>
>btw., this patch depends on the latest acpi tree, right?
>
> Ingo
No. There are no dependencies with any ACPI changes that I know of. I
tested this patche both with latest git and mm and worked fine both
ways.
Thanks,
Venki
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-08-27 21:56 [PATCH] Track accurate idle time with tick_sched.idle_sleeptime Venki Pallipadi
2007-08-31 18:22 ` Ingo Molnar
@ 2007-09-01 22:23 ` Andi Kleen
2007-09-02 7:28 ` Ingo Molnar
1 sibling, 1 reply; 9+ messages in thread
From: Andi Kleen @ 2007-09-01 22:23 UTC (permalink / raw)
To: Venki Pallipadi; +Cc: Thomas Gleixner, Andrew Morton, Ingo Molnar, linux-kernel
Venki Pallipadi <venkatesh.pallipadi@intel.com> writes:
>
> +void tick_nohz_stop_idle(int cpu)
> +{
> + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
> +
> + if (ts->idle_active) {
> + ktime_t now, delta;
> + now = ktime_get();
That could be PM timer read costing thousands of cycles.
For going into idle it's probably not a real problem, but it could
seriously affect out of idle latencies.
You would really need a cheaper per CPU timer that doesn't
stop in idle (which rules out sched_clock). It's all unfortunately
quite messy.
-Andi
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-09-01 22:23 ` Andi Kleen
@ 2007-09-02 7:28 ` Ingo Molnar
2007-09-02 7:46 ` Andi Kleen
0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2007-09-02 7:28 UTC (permalink / raw)
To: Andi Kleen; +Cc: Venki Pallipadi, Thomas Gleixner, Andrew Morton, linux-kernel
* Andi Kleen <andi@firstfloor.org> wrote:
> Venki Pallipadi <venkatesh.pallipadi@intel.com> writes:
> >
> > +void tick_nohz_stop_idle(int cpu)
> > +{
> > + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
> > +
> > + if (ts->idle_active) {
> > + ktime_t now, delta;
> > + now = ktime_get();
>
> That could be PM timer read costing thousands of cycles.
>
> For going into idle it's probably not a real problem, but it could
> seriously affect out of idle latencies.
at least the current out-of-idle code already does what amounts to a
PM-timer read when exiting from C2 or C3 mode. The
sched_clock_idle_sleep_event() and sched_clock_idle_wakeup_event()
callbacks utilize this already existing time information.
> You would really need a cheaper per CPU timer that doesn't stop in
> idle (which rules out sched_clock). It's all unfortunately quite
> messy.
yep.
Ingo
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-09-02 7:28 ` Ingo Molnar
@ 2007-09-02 7:46 ` Andi Kleen
2007-09-02 10:02 ` Ingo Molnar
0 siblings, 1 reply; 9+ messages in thread
From: Andi Kleen @ 2007-09-02 7:46 UTC (permalink / raw)
To: Ingo Molnar
Cc: Andi Kleen, Venki Pallipadi, Thomas Gleixner, Andrew Morton,
linux-kernel
> at least the current out-of-idle code already does what amounts to a
> PM-timer read when exiting from C2 or C3 mode. The
C2/C3 are already slow. I more worry about C1.
Also doing two instead of one can have an impact even on C2+, especially
when the operation can be thousands of cycles.
-Andi
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-09-02 7:46 ` Andi Kleen
@ 2007-09-02 10:02 ` Ingo Molnar
2007-09-10 18:59 ` Pallipadi, Venkatesh
0 siblings, 1 reply; 9+ messages in thread
From: Ingo Molnar @ 2007-09-02 10:02 UTC (permalink / raw)
To: Andi Kleen; +Cc: Venki Pallipadi, Thomas Gleixner, Andrew Morton, linux-kernel
* Andi Kleen <andi@firstfloor.org> wrote:
> > at least the current out-of-idle code already does what amounts to a
> > PM-timer read when exiting from C2 or C3 mode. The
>
> C2/C3 are already slow. I more worry about C1.
C2/C3 are only slow in older CPUs - and they are getting faster and
faster. (also, newer systems do less of C1, due to increased energy
awareness.)
would be nice to measure the overhead/impact i suspect.
Ingo
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH] Track accurate idle time with tick_sched.idle_sleeptime
2007-09-02 10:02 ` Ingo Molnar
@ 2007-09-10 18:59 ` Pallipadi, Venkatesh
0 siblings, 0 replies; 9+ messages in thread
From: Pallipadi, Venkatesh @ 2007-09-10 18:59 UTC (permalink / raw)
To: Ingo Molnar, Andi Kleen; +Cc: Thomas Gleixner, Andrew Morton, linux-kernel
>-----Original Message-----
>From: Ingo Molnar [mailto:mingo@elte.hu]
>Sent: Sunday, September 02, 2007 3:02 AM
>To: Andi Kleen
>Cc: Pallipadi, Venkatesh; Thomas Gleixner; Andrew Morton; linux-kernel
>Subject: Re: [PATCH] Track accurate idle time with
>tick_sched.idle_sleeptime
>
>
>* Andi Kleen <andi@firstfloor.org> wrote:
>
>> > at least the current out-of-idle code already does what
>amounts to a
>> > PM-timer read when exiting from C2 or C3 mode. The
>>
>> C2/C3 are already slow. I more worry about C1.
>
>C2/C3 are only slow in older CPUs - and they are getting faster and
>faster. (also, newer systems do less of C1, due to increased energy
>awareness.)
>
>would be nice to measure the overhead/impact i suspect.
>
Agree with latency concern out of C1. One option I was looking at was to
delay the timer update to do something like this
- C-state idle
- Break out due to interrupt
- Record TSC
- Handle interrupt
- irq_exit do a delayed update to time
- Look at current TSC and previously recorded TSC and current time and
get the actual idle time
This way, we will not have latency for interrupt handling itself. But,
it is not so clean to do this as there are old systems where TSCs vary
with frequency and it is not a generic solution like ktime_get().
I had tried lmbench before sending out the patch and did not see
increased latency in scheduler or pipe related numbers. Any other
microbenchmark where I can possibly quantify the latency?
Also, this current patch does the accounting only for idle process. But,
going forward, we may need similar accounting even when we are executing
a process, so that scheduler can do the fine grained time slice
management and not account the interrupt handling time to the process.
We will have similar issue at a larger scale then.
Thanks,
Venki
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2007-09-10 19:01 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-27 21:56 [PATCH] Track accurate idle time with tick_sched.idle_sleeptime Venki Pallipadi
2007-08-31 18:22 ` Ingo Molnar
2007-08-31 18:24 ` Ingo Molnar
2007-08-31 18:26 ` Pallipadi, Venkatesh
2007-09-01 22:23 ` Andi Kleen
2007-09-02 7:28 ` Ingo Molnar
2007-09-02 7:46 ` Andi Kleen
2007-09-02 10:02 ` Ingo Molnar
2007-09-10 18:59 ` Pallipadi, Venkatesh
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox