From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
Thomas Gleixner <tglx@linutronix.de>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Andrew Morton <akpm@linux-foundation.org>
Subject: [GIT PULL] timer fix
Date: Sat, 26 Aug 2017 09:17:26 +0200 [thread overview]
Message-ID: <20170826071726.cbervdmp5lmedgm3@gmail.com> (raw)
Linus,
Please pull the latest timers-urgent-for-linus git tree from:
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers-urgent-for-linus
# HEAD: 2fe59f507a65dbd734b990a11ebc7488f6f87a24 timers: Fix excessive granularity of new timers after a nohz idle
Fix a timer granularity handling race+bug, which would manifest itself by
spuriously increasing timeouts of some timers (from 1 jiffy to ~500 jiffies
in the worst case measured) in certain nohz states.
Thanks,
Ingo
------------------>
Nicholas Piggin (1):
timers: Fix excessive granularity of new timers after a nohz idle
kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 41 insertions(+), 9 deletions(-)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
bool migration_enabled;
bool nohz_active;
bool is_idle;
+ bool must_forward_clk;
DECLARE_BITMAP(pending_map, WHEEL_SIZE);
struct hlist_head vectors[WHEEL_SIZE];
} ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
static inline void forward_timer_base(struct timer_base *base)
{
- unsigned long jnow = READ_ONCE(jiffies);
+ unsigned long jnow;
/*
- * We only forward the base when it's idle and we have a delta between
- * base clock and jiffies.
+ * We only forward the base when we are idle or have just come out of
+ * idle (must_forward_clk logic), and have a delta between base clock
+ * and jiffies. In the common case, run_timers will take care of it.
*/
- if (!base->is_idle || (long) (jnow - base->clk) < 2)
+ if (likely(!base->must_forward_clk))
+ return;
+
+ jnow = READ_ONCE(jiffies);
+ base->must_forward_clk = base->is_idle;
+ if ((long)(jnow - base->clk) < 2)
return;
/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* same array bucket then just return:
*/
if (timer_pending(timer)) {
+ /*
+ * The downside of this optimization is that it can result in
+ * larger granularity than you would get from adding a new
+ * timer with this expiry.
+ */
if (timer->expires == expires)
return 1;
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
* dequeue/enqueue dance.
*/
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
clk = base->clk;
idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
}
} else {
base = lock_timer_base(timer, &flags);
+ forward_timer_base(base);
}
ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
raw_spin_lock(&base->lock);
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | base->cpu);
+ forward_timer_base(base);
}
}
- /* Try to forward a stale timer base clock */
- forward_timer_base(base);
-
timer->expires = expires;
/*
* If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
WRITE_ONCE(timer->flags,
(timer->flags & ~TIMER_BASEMASK) | cpu);
}
+ forward_timer_base(base);
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
if (!is_max_delta)
expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
/*
- * If we expect to sleep more than a tick, mark the base idle:
+ * If we expect to sleep more than a tick, mark the base idle.
+ * Also the tick is stopped so any added timer must forward
+ * the base clk itself to keep granularity small. This idle
+ * logic is only maintained for the BASE_STD base, deferrable
+ * timers may still see large granularity skew (by design).
*/
- if ((expires - basem) > TICK_NSEC)
+ if ((expires - basem) > TICK_NSEC) {
+ base->must_forward_clk = true;
base->is_idle = true;
+ }
}
raw_spin_unlock(&base->lock);
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
+ /*
+ * must_forward_clk must be cleared before running timers so that any
+ * timer functions that call mod_timer will not try to forward the
+ * base. idle trcking / clock forwarding logic is only used with
+ * BASE_STD timers.
+ *
+ * The deferrable base does not do idle tracking at all, so we do
+ * not forward it. This can result in very large variations in
+ * granularity for deferrable timers, but they can be deferred for
+ * long periods due to idle.
+ */
+ base->must_forward_clk = false;
+
__run_timers(base);
if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
next reply other threads:[~2017-08-26 7:17 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-08-26 7:17 Ingo Molnar [this message]
-- strict thread matches above, loose matches on Subject: below --
2025-04-18 20:34 [GIT PULL] timer fix Ingo Molnar
2025-04-18 21:15 ` pr-tracker-bot
2024-06-15 8:05 Ingo Molnar
2024-06-15 18:38 ` pr-tracker-bot
2024-05-10 11:12 Ingo Molnar
2024-05-10 17:29 ` pr-tracker-bot
2020-06-28 18:39 Ingo Molnar
2020-06-28 22:05 ` pr-tracker-bot
2020-04-25 10:16 Ingo Molnar
2020-04-25 19:30 ` pr-tracker-bot
2019-11-16 21:38 Ingo Molnar
2019-11-17 0:35 ` pr-tracker-bot
2019-10-02 22:06 Ingo Molnar
2019-10-02 23:00 ` pr-tracker-bot
2019-09-26 20:18 Ingo Molnar
2019-09-26 23:00 ` pr-tracker-bot
2019-04-12 13:09 Ingo Molnar
2019-04-13 4:05 ` pr-tracker-bot
2018-12-21 12:34 Ingo Molnar
2018-12-21 19:30 ` pr-tracker-bot
2018-12-23 19:29 ` Heiko Carstens
2019-01-17 9:51 ` Ingo Molnar
2019-01-17 15:58 ` Heiko Carstens
2019-01-17 16:57 ` Thomas Gleixner
2018-03-25 9:00 Ingo Molnar
2017-09-24 11:25 Ingo Molnar
2017-07-21 10:21 Ingo Molnar
2017-05-12 7:35 Ingo Molnar
2017-01-18 9:37 Ingo Molnar
2016-12-23 22:53 Ingo Molnar
2016-10-18 11:18 Ingo Molnar
2016-07-13 12:58 Ingo Molnar
2016-04-23 11:34 Ingo Molnar
2015-08-14 7:13 Ingo Molnar
2015-07-18 3:06 Ingo Molnar
2015-02-06 18:38 Ingo Molnar
2014-03-29 18:44 Ingo Molnar
2014-01-15 18:27 Ingo Molnar
2013-10-26 12:27 Ingo Molnar
2013-09-18 16:22 Ingo Molnar
2011-10-17 1:39 Linux 3.1-rc9 Linus Torvalds
2011-10-17 10:34 ` Peter Zijlstra
2011-10-17 14:57 ` Linus Torvalds
2011-10-17 17:54 ` Peter Zijlstra
2011-10-17 18:31 ` Linus Torvalds
2011-10-17 19:23 ` Peter Zijlstra
2011-10-17 21:00 ` Thomas Gleixner
2011-10-18 8:39 ` Thomas Gleixner
2011-10-18 9:05 ` Peter Zijlstra
2011-10-18 14:59 ` Linus Torvalds
2011-10-18 18:14 ` [GIT PULL] timer fix Ingo Molnar
2011-04-29 18:11 Ingo Molnar
2011-02-28 17:39 Ingo Molnar
2011-02-15 17:06 Ingo Molnar
2010-01-31 17:26 Ingo Molnar
2009-10-02 12:38 Ingo Molnar
2009-09-26 12:27 Ingo Molnar
2009-08-09 16:09 Ingo Molnar
2009-08-04 19:04 Ingo Molnar
2009-06-20 16:55 Ingo Molnar
2009-02-17 16:38 [git pull] " Ingo Molnar
2009-02-04 19:25 Ingo Molnar
2009-02-04 22:11 ` Linus Torvalds
2009-02-04 22:16 ` Linus Torvalds
2009-02-04 22:25 ` Ingo Molnar
2009-02-04 22:58 ` Ingo Molnar
2009-02-04 23:13 ` H. Peter Anvin
2009-02-05 0:04 ` Ingo Molnar
2009-02-05 7:51 ` Kirill Korotaev
2009-02-05 9:58 ` Pavel Emelyanov
2009-02-05 14:30 ` Ingo Molnar
2009-02-05 16:04 ` Ray Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170826071726.cbervdmp5lmedgm3@gmail.com \
--to=mingo@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).