From: Thomas Gleixner <tglx@linutronix.de>
To: Andrew Morton <akpm@osdl.org>
Cc: LKML <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@elte.hu>,
Jim Gettys <jg@laptop.org>, John Stultz <johnstul@us.ibm.com>,
David Woodhouse <dwmw2@infradead.org>,
Arjan van de Ven <arjan@infradead.org>,
Dave Jones <davej@redhat.com>
Subject: [patch 17/21] dynticks: core
Date: Sun, 01 Oct 2006 23:01:04 -0000 [thread overview]
Message-ID: <20061001225724.985115000@cruncher.tec.linutronix.de> (raw)
In-Reply-To: 20061001225720.115967000@cruncher.tec.linutronix.de
[-- Attachment #1: hrtimer-no-idle-hz.patch --]
[-- Type: text/plain, Size: 12882 bytes --]
From: Ingo Molnar <mingo@elte.hu>
dynamic ticks core code.
This is an extension to the per-cpu sched_tick timer of the high
resolution timer functionality. The sched_tick timer is reprogrammed
to a longer timeout before going idle, when no timer events are due in
the next tick. The periodic tick is resumed when the CPU leaves the
idle state. If a non-timer IRQ hits the idle task jiffies are updated
from irq_enter before calling the interrupt code, otherwise the interrupt
handler would eventually deal with a stale jiffy value.
The per-cpu idle statistics information can be used to optimize power
management decisions.
More detailed information is available in Documentation/hrtimer/highres.txt
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--
include/linux/hrtimer.h | 32 ++++++
kernel/hrtimer.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++
kernel/softirq.c | 11 ++
kernel/time/Kconfig | 8 +
kernel/timer.c | 2
5 files changed, 273 insertions(+), 1 deletion(-)
Index: linux-2.6.18-mm2/include/linux/hrtimer.h
===================================================================
--- linux-2.6.18-mm2.orig/include/linux/hrtimer.h 2006-10-02 00:55:54.000000000 +0200
+++ linux-2.6.18-mm2/include/linux/hrtimer.h 2006-10-02 00:55:54.000000000 +0200
@@ -22,6 +22,7 @@
#include <linux/list.h>
#include <linux/wait.h>
+struct seq_file;
struct hrtimer_clock_base;
struct hrtimer_cpu_base;
@@ -180,6 +181,16 @@ struct hrtimer_clock_base {
* resolution mode
* @sched_regs: Temporary storage for pt_regs for the sched_timer
* callback
+ * @nr_events: Total number of timer interrupt events
+ * @idle_tick: Store the last idle tick expiry time when the tick
+ * timer is modified for idle sleeps. This is necessary
+ * to resume the tick timer operation in the timeline
+ * when the CPU returns from idle
+ * @tick_stopped: Indicator that the idle tick has been stopped
+ * @idle_calls: Total number of idle calls
+ * @idle_sleeps: Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime: Time when the idle call was entered
+ * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
*/
struct hrtimer_cpu_base {
spinlock_t lock;
@@ -192,6 +203,15 @@ struct hrtimer_cpu_base {
struct list_head cb_pending;
struct hrtimer sched_timer;
struct pt_regs *sched_regs;
+ unsigned long nr_events;
+#endif
+#ifdef CONFIG_NO_HZ
+ ktime_t idle_tick;
+ int tick_stopped;
+ unsigned long idle_calls;
+ unsigned long idle_sleeps;
+ ktime_t idle_entrytime;
+ ktime_t idle_sleeptime;
#endif
};
@@ -298,6 +318,18 @@ extern void hrtimer_run_queues(void);
/* Resume notification */
void hrtimer_notify_resume(void);
+#ifdef CONFIG_NO_HZ
+extern void hrtimer_stop_sched_tick(void);
+extern void hrtimer_restart_sched_tick(void);
+extern void hrtimer_update_jiffies(void);
+extern void show_no_hz_stats(struct seq_file *p);
+#else
+static inline void hrtimer_stop_sched_tick(void) { }
+static inline void hrtimer_restart_sched_tick(void) { }
+static inline void hrtimer_update_jiffies(void) { }
+static inline void show_no_hz_stats(struct seq_file *p) { }
+#endif
+
/* Bootup initialization: */
extern void __init hrtimers_init(void);
Index: linux-2.6.18-mm2/kernel/hrtimer.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/hrtimer.c 2006-10-02 00:55:54.000000000 +0200
+++ linux-2.6.18-mm2/kernel/hrtimer.c 2006-10-02 00:55:54.000000000 +0200
@@ -486,6 +486,221 @@ static void update_jiffies64(ktime_t now
write_sequnlock(&xtime_lock);
}
+#ifdef CONFIG_NO_HZ
+/**
+ * hrtimer_update_jiffies - update jiffies when idle was interrupted
+ *
+ * Called from interrupt entry when the CPU was idle
+ *
+ * In case the sched_tick was stopped on this CPU, we have to check if jiffies
+ * must be updated. Otherwise an interrupt handler could use a stale jiffy
+ * value.
+ */
+void hrtimer_update_jiffies(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ unsigned long flags;
+ ktime_t now;
+
+ if (!cpu_base->tick_stopped || !cpu_base->hres_active)
+ return;
+
+ now = ktime_get();
+
+ local_irq_save(flags);
+ update_jiffies64(now);
+ local_irq_restore(flags);
+}
+
+/**
+ * hrtimer_stop_sched_tick - stop the idle tick from the idle task
+ *
+ * When the next event is more than a tick into the future, stop the idle tick
+ * Called either from the idle loop or from irq_exit() when a idle period was
+ * just interrupted by a interrupt which did not cause a reschedule.
+ */
+void hrtimer_stop_sched_tick(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ unsigned long seq, last_jiffies, next_jiffies;
+ ktime_t last_update, expires, now;
+ unsigned long delta_jiffies;
+ unsigned long flags;
+
+ if (unlikely(!cpu_base->hres_active))
+ return;
+
+ local_irq_save(flags);
+
+ now = ktime_get();
+ /*
+ * When called from irq_exit we need to account the idle sleep time
+ * correctly.
+ */
+ if (cpu_base->tick_stopped) {
+ ktime_t delta = ktime_sub(now, cpu_base->idle_entrytime);
+
+ cpu_base->idle_sleeptime = ktime_add(cpu_base->idle_sleeptime,
+ delta);
+ }
+ cpu_base->idle_entrytime = now;
+ cpu_base->idle_calls++;
+
+ /* Read jiffies and the time when jiffies were updated last */
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ last_update = last_jiffies_update;
+ last_jiffies = jiffies;
+ } while (read_seqretry(&xtime_lock, seq));
+
+ /* Get the next timer wheel timer */
+ next_jiffies = get_next_timer_interrupt(last_jiffies);
+ delta_jiffies = next_jiffies - last_jiffies;
+
+ if ((long)delta_jiffies >= 1) {
+ /*
+ * hrtimer_stop_sched_tick can be called several times before
+ * the hrtimer_restart_sched_tick is called. This happens when
+ * interrupts arrive which do not cause a reschedule. In the
+ * first call we save the current tick time, so we can restart
+ * the scheduler tick in hrtimer_restart_sched_tick.
+ */
+ if (!cpu_base->tick_stopped) {
+ cpu_base->idle_tick = cpu_base->sched_timer.expires;
+ cpu_base->tick_stopped = 1;
+ }
+ /* calculate the expiry time for the next timer wheel timer */
+ expires = ktime_add_ns(last_update,
+ nsec_per_hz.tv64 * delta_jiffies);
+ hrtimer_start(&cpu_base->sched_timer, expires,
+ HRTIMER_MODE_ABS);
+ cpu_base->idle_sleeps++;
+ } else {
+ /* Raise the softirq if the timer wheel is behind jiffies */
+ if ((long) delta_jiffies < 0)
+ raise_softirq_irqoff(TIMER_SOFTIRQ);
+ }
+
+ local_irq_restore(flags);
+}
+
+/**
+ * hrtimer_restart_sched_tick - restart the idle tick from the idle task
+ *
+ * Restart the idle tick when the CPU is woken up from idle
+ */
+void hrtimer_restart_sched_tick(void)
+{
+ struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
+ ktime_t now, delta;
+
+ if (!cpu_base->hres_active || !cpu_base->tick_stopped)
+ return;
+
+ /* Update jiffies first */
+ now = ktime_get();
+
+ local_irq_disable();
+ update_jiffies64(now);
+
+ /*
+ * Update process times would randomly account the time we slept to
+ * whatever the context of the next sched tick is. Enforce that this
+ * is accounted to idle !
+ */
+ add_preempt_count(HARDIRQ_OFFSET);
+ update_process_times(0);
+ sub_preempt_count(HARDIRQ_OFFSET);
+
+ /* Account the idle time */
+ delta = ktime_sub(now, cpu_base->idle_entrytime);
+ cpu_base->idle_sleeptime = ktime_add(cpu_base->idle_sleeptime, delta);
+
+ /*
+ * Cancel the scheduled timer and restore the tick
+ */
+ cpu_base->tick_stopped = 0;
+ hrtimer_cancel(&cpu_base->sched_timer);
+ cpu_base->sched_timer.expires = cpu_base->idle_tick;
+
+ while (1) {
+ /* Forward the time to expire in the future */
+ hrtimer_forward(&cpu_base->sched_timer, now, nsec_per_hz);
+ hrtimer_start(&cpu_base->sched_timer,
+ cpu_base->sched_timer.expires, HRTIMER_MODE_ABS);
+
+ /* Check, if the timer was already in the past */
+ if (hrtimer_active(&cpu_base->sched_timer))
+ break;
+ /* Update jiffies and reread time */
+ update_jiffies64(now);
+ now = ktime_get();
+ }
+ local_irq_enable();
+}
+
+/**
+ * show_no_hz_stats - print out the no hz statistics
+ *
+ * The no_hz statistics are appended at the end of /proc/stats
+ *
+ * I: total number of idle calls
+ * S: number of idle calls which stopped the sched tick
+ * T: Summed up sleep time in idle with sched tick stopped (unit is seconds)
+ * A: Average sleep time: T/S (unit is seconds)
+ * E: Total number of timer interrupt events
+ */
+void show_no_hz_stats(struct seq_file *p)
+{
+ unsigned long calls = 0, sleeps = 0, events = 0;
+ struct timeval tsum, tavg;
+ ktime_t totaltime = { .tv64 = 0 };
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
+
+ calls += base->idle_calls;
+ sleeps += base->idle_sleeps;
+ totaltime = ktime_add(totaltime, base->idle_sleeptime);
+ events += base->nr_events;
+
+#ifdef CONFIG_SMP
+ tsum = ktime_to_timeval(base->idle_sleeptime);
+ if (base->idle_sleeps) {
+ uint64_t nsec = ktime_to_ns(base->idle_sleeptime);
+
+ do_div(nsec, base->idle_sleeps);
+ tavg = ns_to_timeval(nsec);
+ } else
+ tavg.tv_sec = tavg.tv_usec = 0;
+
+ seq_printf(p, "nohz cpu%d I:%lu S:%lu T:%d.%06d A:%d.%06d E: %lu\n",
+ cpu, base->idle_calls, base->idle_sleeps,
+ (int) tsum.tv_sec, (int) tsum.tv_usec,
+ (int) tavg.tv_sec, (int) tavg.tv_usec,
+ base->nr_events);
+#endif
+ }
+
+ tsum = ktime_to_timeval(totaltime);
+ if (sleeps) {
+ uint64_t nsec = ktime_to_ns(totaltime);
+
+ do_div(nsec, sleeps);
+ tavg = ns_to_timeval(nsec);
+ } else
+ tavg.tv_sec = tavg.tv_usec = 0;
+
+ seq_printf(p, "nohz total I:%lu S:%lu T:%d.%06d A:%d.%06d E: %lu\n",
+ calls, sleeps,
+ (int) tsum.tv_sec, (int) tsum.tv_usec,
+ (int) tavg.tv_sec, (int) tavg.tv_usec,
+ events);
+}
+
+#endif
+
/*
* We rearm the timer until we get disabled by the idle code
* Called with interrupts disabled.
@@ -513,6 +728,11 @@ static enum hrtimer_restart hrtimer_sche
hrtimer_forward(timer, hrtimer_cb_get_time(timer), nsec_per_hz);
+#ifdef CONFIG_NO_HZ
+ /* Do not restart, when we are in the idle loop */
+ if (cpu_base->tick_stopped)
+ return HRTIMER_NORESTART;
+#endif
return HRTIMER_RESTART;
}
@@ -1076,6 +1296,7 @@ void hrtimer_interrupt(struct pt_regs *r
/* Store the regs for an possible sched_timer callback */
cpu_base->sched_regs = regs;
+ cpu_base->nr_events++;
retry:
now = ktime_get();
Index: linux-2.6.18-mm2/kernel/softirq.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/softirq.c 2006-10-02 00:55:51.000000000 +0200
+++ linux-2.6.18-mm2/kernel/softirq.c 2006-10-02 00:55:54.000000000 +0200
@@ -281,6 +281,11 @@ void irq_enter(void)
account_system_vtime(current);
add_preempt_count(HARDIRQ_OFFSET);
trace_hardirq_enter();
+
+#ifdef CONFIG_NO_HZ
+ if (idle_cpu(smp_processor_id()) && !in_interrupt())
+ hrtimer_update_jiffies();
+#endif
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -299,6 +304,12 @@ void irq_exit(void)
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
+
+#ifdef CONFIG_NO_HZ
+ /* Make sure that timer wheel updates are propagated */
+ if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
+ hrtimer_stop_sched_tick();
+#endif
preempt_enable_no_resched();
}
Index: linux-2.6.18-mm2/kernel/time/Kconfig
===================================================================
--- linux-2.6.18-mm2.orig/kernel/time/Kconfig 2006-10-02 00:55:54.000000000 +0200
+++ linux-2.6.18-mm2/kernel/time/Kconfig 2006-10-02 00:55:54.000000000 +0200
@@ -20,3 +20,11 @@ config HIGH_RES_RESOLUTION
800 MHz processor about 10,000 (10 microseconds) is recommended as a
finest resolution. If you don't need that sort of resolution,
larger values may generate less overhead.
+
+config NO_HZ
+ bool "Tickless System (Dynamic Ticks)"
+ depends on GENERIC_TIME && HIGH_RES_TIMERS
+ help
+ This option enables a tickless system: timer interrupts will
+ only trigger on an as-needed basis both when the system is
+ busy and when the system is idle.
Index: linux-2.6.18-mm2/kernel/timer.c
===================================================================
--- linux-2.6.18-mm2.orig/kernel/timer.c 2006-10-02 00:55:54.000000000 +0200
+++ linux-2.6.18-mm2/kernel/timer.c 2006-10-02 00:55:54.000000000 +0200
@@ -462,7 +462,7 @@ static inline void __run_timers(tvec_bas
spin_unlock_irq(&base->lock);
}
-#ifdef CONFIG_NO_IDLE_HZ
+#if defined(CONFIG_NO_IDLE_HZ) || defined(CONFIG_NO_HZ)
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a cpus is idle.
--
next prev parent reply other threads:[~2006-10-01 23:10 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-10-01 22:59 [patch 00/21] high resolution timers / dynamic ticks - V2 Thomas Gleixner
2006-10-01 22:59 ` [patch 01/21] GTOD: exponential update_wall_time Thomas Gleixner
2006-10-01 23:00 ` [patch 02/21] GTOD: persistent clock support, core Thomas Gleixner
2006-10-01 23:00 ` [patch 03/21] GTOD: persistent clock support, i386 Thomas Gleixner
2006-10-01 23:00 ` [patch 04/21] time: uninline jiffies.h Thomas Gleixner
2006-10-01 23:00 ` [patch 05/21] time: fix msecs_to_jiffies() bug Thomas Gleixner
2006-10-01 23:00 ` [patch 06/21] time: fix timeout overflow Thomas Gleixner
2006-10-01 23:00 ` [patch 07/21] cleanup: uninline irq_enter() and move it into a function Thomas Gleixner
2006-10-01 23:00 ` [patch 08/21] dynticks: extend next_timer_interrupt() to use a reference jiffie Thomas Gleixner
2006-10-01 23:00 ` [patch 09/21] hrtimers: namespace and enum cleanup Thomas Gleixner
2006-10-01 23:00 ` [patch 10/21] hrtimers: clean up locking Thomas Gleixner
2006-10-01 23:00 ` [patch 11/21] hrtimers: state tracking Thomas Gleixner
2006-10-01 23:00 ` [patch 12/21] hrtimers: clean up callback tracking Thomas Gleixner
2006-10-01 23:01 ` [patch 13/21] hrtimers: Move and add documentation Thomas Gleixner
2006-10-01 23:01 ` [patch 14/21] clockevents: core Thomas Gleixner
2006-10-01 23:01 ` [patch 15/21] clockevents: drivers for i386 Thomas Gleixner
2006-10-01 23:01 ` [patch 16/21] high-res timers: core Thomas Gleixner
2006-10-02 11:50 ` Paulo Marques
2006-10-01 23:01 ` Thomas Gleixner [this message]
2006-10-02 6:41 ` [patch] dynticks: core, NMI watchdog fix Ingo Molnar
2006-10-02 8:54 ` [patch] dynticks: core, NMI watchdog fix, #2 Ingo Molnar
2006-10-01 23:01 ` [patch 18/21] dyntick: add nohz stats to /proc/stat Thomas Gleixner
2006-10-01 23:01 ` [patch 19/21] dynticks: i386 arch code Thomas Gleixner
2006-10-01 23:01 ` [patch 20/21] high-res timers, dynticks: enable i386 support Thomas Gleixner
2006-10-01 23:01 ` [patch 21/21] debugging feature: timer stats Thomas Gleixner
2006-10-02 5:11 ` [patch 00/21] high resolution timers / dynamic ticks - V2 Valdis.Kletnieks
2006-10-02 13:02 ` Valdis.Kletnieks
2006-10-02 13:43 ` Thomas Gleixner
2006-10-02 18:25 ` Valdis.Kletnieks
2006-10-02 18:38 ` john stultz
2006-10-02 19:08 ` Valdis.Kletnieks
2006-10-02 19:23 ` john stultz
2006-10-02 18:43 ` [patch] dynticks core: Fix idle time accounting Thomas Gleixner
2006-10-02 20:17 ` Valdis.Kletnieks
2006-10-02 21:22 ` Thomas Gleixner
2006-10-02 21:35 ` Valdis.Kletnieks
2006-10-03 20:02 ` Thomas Gleixner
2006-10-03 21:05 ` Thomas Gleixner
2006-10-04 2:33 ` Valdis.Kletnieks
2006-10-04 7:56 ` Ingo Molnar
2006-10-04 9:58 ` Valdis.Kletnieks
2006-10-03 3:23 ` [patch 00/21] high resolution timers / dynamic ticks - V2 Andrew Morton
2006-10-03 4:00 ` Andrew Morton
2006-10-03 8:38 ` Thomas Gleixner
2006-10-03 8:47 ` Ingo Molnar
2006-10-03 10:35 ` [patch] clockevents: drivers for i386, fix #2 Ingo Molnar
2006-10-04 3:36 ` Andrew Morton
2006-10-04 6:46 ` Ingo Molnar
2006-10-04 7:32 ` Andrew Morton
2006-10-04 7:41 ` Ingo Molnar
2006-10-04 8:01 ` Andrew Morton
2006-10-04 7:55 ` Ingo Molnar
2006-10-04 8:15 ` Andrew Morton
2006-10-04 10:53 ` Ingo Molnar
2006-10-04 11:19 ` Thomas Gleixner
2006-10-04 16:02 ` Andrew Morton
2006-10-04 16:20 ` Thomas Gleixner
2006-10-04 16:35 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061001225724.985115000@cruncher.tec.linutronix.de \
--to=tglx@linutronix.de \
--cc=akpm@osdl.org \
--cc=arjan@infradead.org \
--cc=davej@redhat.com \
--cc=dwmw2@infradead.org \
--cc=jg@laptop.org \
--cc=johnstul@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox