From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Andrew Morton <akpm@linux-foundation.org>
Subject: [GIT PULL] timer fix
Date: Sat, 26 Aug 2017 09:17:26 +0200 [thread overview]
Message-ID: <20170826071726.cbervdmp5lmedgm3@gmail.com> (raw)
Linus,
Please pull the latest timers-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers-urgent-for-linus
# HEAD: 2fe59f507a65dbd734b990a11ebc7488f6f87a24 timers: Fix excessive granularity of new timers after a nohz idle
Fix a timer granularity handling race+bug, which would manifest itself by
spuriously increasing timeouts of some timers (from 1 jiffy to ~500 jiffies
in the worst case measured) in certain nohz states.
Thanks,
Ingo
------------------>
Nicholas Piggin (1):
timers: Fix excessive granularity of new timers after a nohz idle
kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 41 insertions(+), 9 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
+ bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
- unsigned long jnow = READ_ONCE(jiffies);
+ unsigned long jnow;
/*
- * We only forward the base when it's idle and we have a delta between
- * base clock and jiffies.
+ * We only forward the base when we are idle or have just come out of
+ * idle (must_forward_clk logic), and have a delta between base clock
+ * and jiffies. In the common case, run_timers will take care of it.
*/
- if (!base->is_idle || (long) (jnow - base->clk) < 2)
+ if (likely(!base->must_forward_clk))
+ return;
+
+ jnow = READ_ONCE(jiffies);
+ base->must_forward_clk = base->is_idle;
+ if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
+ /*
+ * The downside of this optimization is that it can result in
+ * larger granularity than you would get from adding a new
+ * timer with this expiry.
+ */
if (timer->expires == expires)
return 1;
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
} else {
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
+ forward_timer_base(base);
}
}
- /* Try to forward a stale timer base clock */
- forward_timer_base(base);
-
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
if (!is_max_delta)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
- * If we expect to sleep more than a tick, mark the base idle:
+ * If we expect to sleep more than a tick, mark the base idle.
+ * Also the tick is stopped so any added timer must forward
+ * the base clk itself to keep granularity small. This idle
+ * logic is only maintained for the BASE_STD base, deferrable
+ * timers may still see large granularity skew (by design).
*/
- if ((expires - basem) > TICK_NSEC)
+ if ((expires - basem) > TICK_NSEC) {
+ base->must_forward_clk = true;
base->is_idle = true;
+ }
}
raw_spin_unlock(&base->lock);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ /*
+ * must_forward_clk must be cleared before running timers so that any
+ * timer functions that call mod_timer will not try to forward the
+ * base. idle trcking / clock forwarding logic is only used with
+ * BASE_STD timers.
+ *
+ * The deferrable base does not do idle tracking at all, so we do
+ * not forward it. This can result in very large variations in
+ * granularity for deferrable timers, but they can be deferred for
+ * long periods due to idle.
+ */
+ base->must_forward_clk = false;
+
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
next reply other threads:[~2017-08-26 7:17 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-26 7:17 Ingo Molnar [this message]
-- strict thread matches above, loose matches on Subject: below --
2025-04-18 20:34 [GIT PULL] timer fix Ingo Molnar
2025-04-18 21:15 ` pr-tracker-bot
2024-06-15 8:05 Ingo Molnar
2024-06-15 18:38 ` pr-tracker-bot
2024-05-10 11:12 Ingo Molnar
2024-05-10 17:29 ` pr-tracker-bot
2020-06-28 18:39 Ingo Molnar
2020-06-28 22:05 ` pr-tracker-bot
2020-04-25 10:16 Ingo Molnar
2020-04-25 19:30 ` pr-tracker-bot
2019-11-16 21:38 Ingo Molnar
2019-11-17 0:35 ` pr-tracker-bot
2019-10-02 22:06 Ingo Molnar
2019-10-02 23:00 ` pr-tracker-bot
2019-09-26 20:18 Ingo Molnar
2019-09-26 23:00 ` pr-tracker-bot
2019-04-12 13:09 Ingo Molnar
2019-04-13 4:05 ` pr-tracker-bot
2018-12-21 12:34 Ingo Molnar
2018-12-21 19:30 ` pr-tracker-bot
2018-12-23 19:29 ` Heiko Carstens
2019-01-17 9:51 ` Ingo Molnar
2019-01-17 15:58 ` Heiko Carstens
2019-01-17 16:57 ` Thomas Gleixner
2018-03-25 9:00 Ingo Molnar
2017-09-24 11:25 Ingo Molnar
2017-07-21 10:21 Ingo Molnar
2017-05-12 7:35 Ingo Molnar
2017-01-18 9:37 Ingo Molnar
2016-12-23 22:53 Ingo Molnar
2016-10-18 11:18 Ingo Molnar
2016-07-13 12:58 Ingo Molnar
2016-04-23 11:34 Ingo Molnar
2015-08-14 7:13 Ingo Molnar
2015-07-18 3:06 Ingo Molnar
2015-02-06 18:38 Ingo Molnar
2014-03-29 18:44 Ingo Molnar
2014-01-15 18:27 Ingo Molnar
2013-10-26 12:27 Ingo Molnar
2013-09-18 16:22 Ingo Molnar
2011-10-17 1:39 Linux 3.1-rc9 Linus Torvalds
2011-10-17 10:34 ` Peter Zijlstra
2011-10-17 14:57 ` Linus Torvalds
2011-10-17 17:54 ` Peter Zijlstra
2011-10-17 18:31 ` Linus Torvalds
2011-10-17 19:23 ` Peter Zijlstra
2011-10-17 21:00 ` Thomas Gleixner
2011-10-18 8:39 ` Thomas Gleixner
2011-10-18 9:05 ` Peter Zijlstra
2011-10-18 14:59 ` Linus Torvalds
2011-10-18 18:14 ` [GIT PULL] timer fix Ingo Molnar
2011-04-29 18:11 Ingo Molnar
2011-02-28 17:39 Ingo Molnar
2011-02-15 17:06 Ingo Molnar
2010-01-31 17:26 Ingo Molnar
2009-10-02 12:38 Ingo Molnar
2009-09-26 12:27 Ingo Molnar
2009-08-09 16:09 Ingo Molnar
2009-08-04 19:04 Ingo Molnar
2009-06-20 16:55 Ingo Molnar
2009-02-17 16:38 [git pull] " Ingo Molnar
2009-02-04 19:25 Ingo Molnar
2009-02-04 22:11 ` Linus Torvalds
2009-02-04 22:16 ` Linus Torvalds
2009-02-04 22:25 ` Ingo Molnar
2009-02-04 22:58 ` Ingo Molnar
2009-02-04 23:13 ` H. Peter Anvin
2009-02-05 0:04 ` Ingo Molnar
2009-02-05 7:51 ` Kirill Korotaev
2009-02-05 9:58 ` Pavel Emelyanov
2009-02-05 14:30 ` Ingo Molnar
2009-02-05 16:04 ` Ray Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170826071726.cbervdmp5lmedgm3@gmail.com \
--to=mingo@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.