linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [GIT PULL] timer fix
Date: Sat, 26 Aug 2017 09:17:26 +0200	[thread overview]
Message-ID: <20170826071726.cbervdmp5lmedgm3@gmail.com> (raw)

Linus,

Please pull the latest timers-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers-urgent-for-linus

   # HEAD: 2fe59f507a65dbd734b990a11ebc7488f6f87a24 timers: Fix excessive granularity of new timers after a nohz idle

Fix a timer granularity handling race+bug, which would manifest itself by 
spuriously increasing timeouts of some timers (from 1 jiffy to ~500 jiffies
in the worst case measured) in certain nohz states.

 Thanks,

	Ingo

------------------>
Nicholas Piggin (1):
      timers: Fix excessive granularity of new timers after a nohz idle


 kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
 	bool			migration_enabled;
 	bool			nohz_active;
 	bool			is_idle;
+	bool			must_forward_clk;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
-	unsigned long jnow = READ_ONCE(jiffies);
+	unsigned long jnow;
 
 	/*
-	 * We only forward the base when it's idle and we have a delta between
-	 * base clock and jiffies.
+	 * We only forward the base when we are idle or have just come out of
+	 * idle (must_forward_clk logic), and have a delta between base clock
+	 * and jiffies. In the common case, run_timers will take care of it.
 	 */
-	if (!base->is_idle || (long) (jnow - base->clk) < 2)
+	if (likely(!base->must_forward_clk))
+		return;
+
+	jnow = READ_ONCE(jiffies);
+	base->must_forward_clk = base->is_idle;
+	if ((long)(jnow - base->clk) < 2)
 		return;
 
 	/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	 * same array bucket then just return:
 	 */
 	if (timer_pending(timer)) {
+		/*
+		 * The downside of this optimization is that it can result in
+		 * larger granularity than you would get from adding a new
+		 * timer with this expiry.
+		 */
 		if (timer->expires == expires)
 			return 1;
 
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 * dequeue/enqueue dance.
 		 */
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 
 		clk = base->clk;
 		idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		}
 	} else {
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 	}
 
 	ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 			raw_spin_lock(&base->lock);
 			WRITE_ONCE(timer->flags,
 				   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
+			forward_timer_base(base);
 		}
 	}
 
-	/* Try to forward a stale timer base clock */
-	forward_timer_base(base);
-
 	timer->expires = expires;
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 		WRITE_ONCE(timer->flags,
 			   (timer->flags & ~TIMER_BASEMASK) | cpu);
 	}
+	forward_timer_base(base);
 
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		if (!is_max_delta)
 			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
 		/*
-		 * If we expect to sleep more than a tick, mark the base idle:
+		 * If we expect to sleep more than a tick, mark the base idle.
+		 * Also the tick is stopped so any added timer must forward
+		 * the base clk itself to keep granularity small. This idle
+		 * logic is only maintained for the BASE_STD base, deferrable
+		 * timers may still see large granularity skew (by design).
 		 */
-		if ((expires - basem) > TICK_NSEC)
+		if ((expires - basem) > TICK_NSEC) {
+			base->must_forward_clk = true;
 			base->is_idle = true;
+		}
 	}
 	raw_spin_unlock(&base->lock);
 
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
+	/*
+	 * must_forward_clk must be cleared before running timers so that any
+	 * timer functions that call mod_timer will not try to forward the
+	 * base. idle trcking / clock forwarding logic is only used with
+	 * BASE_STD timers.
+	 *
+	 * The deferrable base does not do idle tracking at all, so we do
+	 * not forward it. This can result in very large variations in
+	 * granularity for deferrable timers, but they can be deferred for
+	 * long periods due to idle.
+	 */
+	base->must_forward_clk = false;
+
 	__run_timers(base);
 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));

             reply	other threads:[~2017-08-26  7:17 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-08-26  7:17 Ingo Molnar [this message]
  -- strict thread matches above, loose matches on Subject: below --
2025-04-18 20:34 [GIT PULL] timer fix Ingo Molnar
2025-04-18 21:15 ` pr-tracker-bot
2024-06-15  8:05 Ingo Molnar
2024-06-15 18:38 ` pr-tracker-bot
2024-05-10 11:12 Ingo Molnar
2024-05-10 17:29 ` pr-tracker-bot
2020-06-28 18:39 Ingo Molnar
2020-06-28 22:05 ` pr-tracker-bot
2020-04-25 10:16 Ingo Molnar
2020-04-25 19:30 ` pr-tracker-bot
2019-11-16 21:38 Ingo Molnar
2019-11-17  0:35 ` pr-tracker-bot
2019-10-02 22:06 Ingo Molnar
2019-10-02 23:00 ` pr-tracker-bot
2019-09-26 20:18 Ingo Molnar
2019-09-26 23:00 ` pr-tracker-bot
2019-04-12 13:09 Ingo Molnar
2019-04-13  4:05 ` pr-tracker-bot
2018-12-21 12:34 Ingo Molnar
2018-12-21 19:30 ` pr-tracker-bot
2018-12-23 19:29 ` Heiko Carstens
2019-01-17  9:51   ` Ingo Molnar
2019-01-17 15:58     ` Heiko Carstens
2019-01-17 16:57       ` Thomas Gleixner
2018-03-25  9:00 Ingo Molnar
2017-09-24 11:25 Ingo Molnar
2017-07-21 10:21 Ingo Molnar
2017-05-12  7:35 Ingo Molnar
2017-01-18  9:37 Ingo Molnar
2016-12-23 22:53 Ingo Molnar
2016-10-18 11:18 Ingo Molnar
2016-07-13 12:58 Ingo Molnar
2016-04-23 11:34 Ingo Molnar
2015-08-14  7:13 Ingo Molnar
2015-07-18  3:06 Ingo Molnar
2015-02-06 18:38 Ingo Molnar
2014-03-29 18:44 Ingo Molnar
2014-01-15 18:27 Ingo Molnar
2013-10-26 12:27 Ingo Molnar
2013-09-18 16:22 Ingo Molnar
2011-10-17  1:39 Linux 3.1-rc9 Linus Torvalds
2011-10-17 10:34 ` Peter Zijlstra
2011-10-17 14:57   ` Linus Torvalds
2011-10-17 17:54     ` Peter Zijlstra
2011-10-17 18:31       ` Linus Torvalds
2011-10-17 19:23         ` Peter Zijlstra
2011-10-17 21:00           ` Thomas Gleixner
2011-10-18  8:39             ` Thomas Gleixner
2011-10-18  9:05               ` Peter Zijlstra
2011-10-18 14:59                 ` Linus Torvalds
2011-10-18 18:14                   ` [GIT PULL] timer fix Ingo Molnar
2011-04-29 18:11 Ingo Molnar
2011-02-28 17:39 Ingo Molnar
2011-02-15 17:06 Ingo Molnar
2010-01-31 17:26 Ingo Molnar
2009-10-02 12:38 Ingo Molnar
2009-09-26 12:27 Ingo Molnar
2009-08-09 16:09 Ingo Molnar
2009-08-04 19:04 Ingo Molnar
2009-06-20 16:55 Ingo Molnar
2009-02-17 16:38 [git pull] " Ingo Molnar
2009-02-04 19:25 Ingo Molnar
2009-02-04 22:11 ` Linus Torvalds
2009-02-04 22:16   ` Linus Torvalds
2009-02-04 22:25   ` Ingo Molnar
2009-02-04 22:58   ` Ingo Molnar
2009-02-04 23:13     ` H. Peter Anvin
2009-02-05  0:04       ` Ingo Molnar
2009-02-05  7:51     ` Kirill Korotaev
2009-02-05  9:58       ` Pavel Emelyanov
2009-02-05 14:30         ` Ingo Molnar
2009-02-05 16:04         ` Ray Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170826071726.cbervdmp5lmedgm3@gmail.com \
    --to=mingo@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).