public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 0/4] timers: Fix get_next_timer_interrupt() proper
@ 2012-05-25 22:08 Thomas Gleixner
  2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-25 22:08 UTC (permalink / raw)
  To: LKML; +Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

This series was inspired by the patch Gilad sent to fix a shortcoming
in get_next_timer_interrupt().

Instead of working around the problem I think it's better to avoid it
completely.

The fix is on top of a consolidation of the mindlessly copied code in
various timer API functions. Even with the added functionality that
makes the obj code footprint shrink

  text	   data	    bss	    dec	    hex	filename
  18589	   3682	   8257	  30528	   7740	../build/kernel/timer.o
  17997	   3682	   8257	  29936	   74f0	../build/kernel/timer.o

Thanks,

	tglx
----
 timer.c |  110 +++++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 61 insertions(+), 49 deletions(-)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 2/4] timers: Consolidate base->next_timer update
  2012-05-25 22:08 [patch 0/4] timers: Fix get_next_timer_interrupt() proper Thomas Gleixner
@ 2012-05-25 22:08 ` Thomas Gleixner
  2012-05-29  6:34   ` Nikunj A Dadhania
  2012-06-06 11:59   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2012-05-25 22:08 ` [patch 1/4] timers: Create detach_if_pending() and use it Thomas Gleixner
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-25 22:08 UTC (permalink / raw)
  To: LKML; +Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

[-- Attachment #1: timers-consolidate-base-next-timer-update.patch --]
[-- Type: text/plain, Size: 2825 bytes --]

Another bunch of mindlessly copied code. All callers of
internal_add_timer() except the recascading code updates
base->next_timer.

Move this into internal_add_timer() and let the cascading code call
__internal_add_timer().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c |   26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

Index: tip/kernel/timer.c
===================================================================
--- tip.orig/kernel/timer.c
+++ tip/kernel/timer.c
@@ -330,7 +330,8 @@ void set_timer_slack(struct timer_list *
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
@@ -372,6 +373,17 @@ static void internal_add_timer(struct tv
 	list_add_tail(&timer->entry, vec);
 }
 
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+	__internal_add_timer(base, timer);
+	/*
+	 * Update base->next_timer if this is the earliest one.
+	 */
+	if (time_before(timer->expires, base->next_timer) &&
+	    !tbase_get_deferrable(timer->base))
+		base->next_timer = timer->expires;
+}
+
 #ifdef CONFIG_TIMER_STATS
 void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 {
@@ -757,9 +769,6 @@ __mod_timer(struct timer_list *timer, un
 	}
 
 	timer->expires = expires;
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
 	internal_add_timer(base, timer);
 
 out_unlock:
@@ -925,9 +934,6 @@ void add_timer_on(struct timer_list *tim
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
 	debug_activate(timer, timer->expires);
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
 	internal_add_timer(base, timer);
 	/*
 	 * Check whether the other CPU is idle and needs to be
@@ -1079,7 +1085,8 @@ static int cascade(struct tvec_base *bas
 	 */
 	list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
 		BUG_ON(tbase_get_base(timer->base) != base);
-		internal_add_timer(base, timer);
+		/* No accounting, while moving them */
+		__internal_add_timer(base, timer);
 	}
 
 	return index;
@@ -1706,9 +1713,6 @@ static void migrate_timer_list(struct tv
 		timer = list_first_entry(head, struct timer_list, entry);
 		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
-		if (time_before(timer->expires, new_base->next_timer) &&
-		    !tbase_get_deferrable(timer->base))
-			new_base->next_timer = timer->expires;
 		internal_add_timer(new_base, timer);
 	}
 }



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 1/4] timers: Create detach_if_pending() and use it
  2012-05-25 22:08 [patch 0/4] timers: Fix get_next_timer_interrupt() proper Thomas Gleixner
  2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
@ 2012-05-25 22:08 ` Thomas Gleixner
  2012-06-06 11:58   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2012-05-25 22:08 ` [patch 3/4] timers: Add accounting of non deferrable timers Thomas Gleixner
  2012-05-25 22:08 ` [patch 4/4] timers: Improve get_next_timer_interrupt() Thomas Gleixner
  3 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-25 22:08 UTC (permalink / raw)
  To: LKML; +Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

[-- Attachment #1: timers-create-detach-if-pending.patch --]
[-- Type: text/plain, Size: 3667 bytes --]

Most callers of detach_timer() have the same pattern around
them. Check whether the timer is pending and eventually updating
base->next_timer.

Create detach_if_pending() and replace the duplicated code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c |   56 +++++++++++++++++++++++---------------------------------
 1 file changed, 23 insertions(+), 33 deletions(-)

Index: tip/kernel/timer.c
===================================================================
--- tip.orig/kernel/timer.c
+++ tip/kernel/timer.c
@@ -654,8 +654,7 @@ void init_timer_deferrable_key(struct ti
 }
 EXPORT_SYMBOL(init_timer_deferrable_key);
 
-static inline void detach_timer(struct timer_list *timer,
-				int clear_pending)
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 {
 	struct list_head *entry = &timer->entry;
 
@@ -667,6 +666,19 @@ static inline void detach_timer(struct t
 	entry->prev = LIST_POISON2;
 }
 
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+			     bool clear_pending)
+{
+	if (!timer_pending(timer))
+		return 0;
+
+	detach_timer(timer, clear_pending);
+	if (timer->expires == base->next_timer &&
+	    !tbase_get_deferrable(timer->base))
+		base->next_timer = base->timer_jiffies;
+	return 1;
+}
+
 /*
  * We are using hashed locking: holding per_cpu(tvec_bases).lock
  * means that all timers which are tied to this base via timer->base are
@@ -712,16 +724,9 @@ __mod_timer(struct timer_list *timer, un
 
 	base = lock_timer_base(timer, &flags);
 
-	if (timer_pending(timer)) {
-		detach_timer(timer, 0);
-		if (timer->expires == base->next_timer &&
-		    !tbase_get_deferrable(timer->base))
-			base->next_timer = base->timer_jiffies;
-		ret = 1;
-	} else {
-		if (pending_only)
-			goto out_unlock;
-	}
+	ret = detach_if_pending(timer, base, false);
+	if (!ret && pending_only)
+		goto out_unlock;
 
 	debug_activate(timer, expires);
 
@@ -959,13 +964,7 @@ int del_timer(struct timer_list *timer)
 	timer_stats_timer_clear_start_info(timer);
 	if (timer_pending(timer)) {
 		base = lock_timer_base(timer, &flags);
-		if (timer_pending(timer)) {
-			detach_timer(timer, 1);
-			if (timer->expires == base->next_timer &&
-			    !tbase_get_deferrable(timer->base))
-				base->next_timer = base->timer_jiffies;
-			ret = 1;
-		}
+		ret = detach_if_pending(timer, base, true);
 		spin_unlock_irqrestore(&base->lock, flags);
 	}
 
@@ -990,19 +989,10 @@ int try_to_del_timer_sync(struct timer_l
 
 	base = lock_timer_base(timer, &flags);
 
-	if (base->running_timer == timer)
-		goto out;
-
-	timer_stats_timer_clear_start_info(timer);
-	ret = 0;
-	if (timer_pending(timer)) {
-		detach_timer(timer, 1);
-		if (timer->expires == base->next_timer &&
-		    !tbase_get_deferrable(timer->base))
-			base->next_timer = base->timer_jiffies;
-		ret = 1;
+	if (base->running_timer != timer) {
+		timer_stats_timer_clear_start_info(timer);
+		ret = detach_if_pending(timer, base, true);
 	}
-out:
 	spin_unlock_irqrestore(&base->lock, flags);
 
 	return ret;
@@ -1178,7 +1168,7 @@ static inline void __run_timers(struct t
 			timer_stats_account_timer(timer);
 
 			base->running_timer = timer;
-			detach_timer(timer, 1);
+			detach_timer(timer, true);
 
 			spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, data);
@@ -1714,7 +1704,7 @@ static void migrate_timer_list(struct tv
 
 	while (!list_empty(head)) {
 		timer = list_first_entry(head, struct timer_list, entry);
-		detach_timer(timer, 0);
+		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
 		if (time_before(timer->expires, new_base->next_timer) &&
 		    !tbase_get_deferrable(timer->base))



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 3/4] timers: Add accounting of non deferrable timers
  2012-05-25 22:08 [patch 0/4] timers: Fix get_next_timer_interrupt() proper Thomas Gleixner
  2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
  2012-05-25 22:08 ` [patch 1/4] timers: Create detach_if_pending() and use it Thomas Gleixner
@ 2012-05-25 22:08 ` Thomas Gleixner
  2012-06-06 12:00   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  2012-05-25 22:08 ` [patch 4/4] timers: Improve get_next_timer_interrupt() Thomas Gleixner
  3 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-25 22:08 UTC (permalink / raw)
  To: LKML; +Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

[-- Attachment #1: timers-add-accounting.patch --]
[-- Type: text/plain, Size: 3495 bytes --]

The code in get_next_timer_interrupt() is suboptimal as it has to run
through the cascade to find the next expiring timer. On a completely
idle core we should only do that when there is an active timer
enqueued and base->next_timer does not give us an fast answer.

Add accounting of the active timers to the now consolidated
attach/detach code. I deliberately avoided sanity checks because the
code is fully symetric and any fiddling with timers w/o using the API
functions will lead to cute explosions anyway. ulong is big enough
even on 32bit and if we really run into the situation to have more
than 1<<32 timers enqueued there, then we are definitely not in a
state to go idle and run through that code.

This allows us to fix another shortcoming of get_next_timer_interrupt().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c |   31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

Index: tip/kernel/timer.c
===================================================================
--- tip.orig/kernel/timer.c
+++ tip/kernel/timer.c
@@ -77,6 +77,7 @@ struct tvec_base {
 	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
 	unsigned long next_timer;
+	unsigned long active_timers;
 	struct tvec_root tv1;
 	struct tvec tv2;
 	struct tvec tv3;
@@ -377,11 +378,13 @@ static void internal_add_timer(struct tv
 {
 	__internal_add_timer(base, timer);
 	/*
-	 * Update base->next_timer if this is the earliest one.
+	 * Update base->active_timers and base->next_timer
 	 */
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
+	if (!tbase_get_deferrable(timer->base)) {
+		if (time_before(timer->expires, base->next_timer))
+			base->next_timer = timer->expires;
+		base->active_timers++;
+	}
 }
 
 #ifdef CONFIG_TIMER_STATS
@@ -678,6 +681,14 @@ static inline void detach_timer(struct t
 	entry->prev = LIST_POISON2;
 }
 
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+	detach_timer(timer, true);
+	if (!tbase_get_deferrable(timer->base))
+		timer->base->active_timers--;
+}
+
 static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
 			     bool clear_pending)
 {
@@ -685,9 +696,11 @@ static int detach_if_pending(struct time
 		return 0;
 
 	detach_timer(timer, clear_pending);
-	if (timer->expires == base->next_timer &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = base->timer_jiffies;
+	if (!tbase_get_deferrable(timer->base)) {
+		timer->base->active_timers--;
+		if (timer->expires == base->next_timer)
+			base->next_timer = base->timer_jiffies;
+	}
 	return 1;
 }
 
@@ -1175,7 +1188,7 @@ static inline void __run_timers(struct t
 			timer_stats_account_timer(timer);
 
 			base->running_timer = timer;
-			detach_timer(timer, true);
+			detach_expired_timer(timer, base);
 
 			spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, data);
@@ -1701,6 +1714,7 @@ static int __cpuinit init_timers_cpu(int
 
 	base->timer_jiffies = jiffies;
 	base->next_timer = base->timer_jiffies;
+	base->active_timers = 0;
 	return 0;
 }
 
@@ -1711,6 +1725,7 @@ static void migrate_timer_list(struct tv
 
 	while (!list_empty(head)) {
 		timer = list_first_entry(head, struct timer_list, entry);
+		/* We ignore the accounting on the dying cpu */
 		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
 		internal_add_timer(new_base, timer);



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [patch 4/4] timers: Improve get_next_timer_interrupt()
  2012-05-25 22:08 [patch 0/4] timers: Fix get_next_timer_interrupt() proper Thomas Gleixner
                   ` (2 preceding siblings ...)
  2012-05-25 22:08 ` [patch 3/4] timers: Add accounting of non deferrable timers Thomas Gleixner
@ 2012-05-25 22:08 ` Thomas Gleixner
  2012-06-06 12:01   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  3 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-25 22:08 UTC (permalink / raw)
  To: LKML; +Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

[-- Attachment #1: timers-fix-get-next-interrupt.patch --]
[-- Type: text/plain, Size: 2561 bytes --]

Gilad reported at

 http://lkml.kernel.org/r/1336056962-10465-2-git-send-email-gilad@benyossef.com

"Current timer code fails to correctly return a value meaning that
 there is no future timer event, with the result that the timer keeps
 getting re-armed in HZ one shot mode even when we could turn it off,
 generating unneeded interrupts.

 What is happening is that when __next_timer_interrupt() wishes
 to return a value that signifies "there is no future timer
 event", it returns (base->timer_jiffies + NEXT_TIMER_MAX_DELTA).

 However, the code in tick_nohz_stop_sched_tick(), which called
 __next_timer_interrupt() via get_next_timer_interrupt(),
 compares the return value to (last_jiffies + NEXT_TIMER_MAX_DELTA)
 to see if the timer needs to be re-armed.

 base->timer_jiffies != last_jiffies and so tick_nohz_stop_sched_tick()
 interperts the return value as indication that there is a distant
 future event 12 days from now and programs the timer to fire next
 after KTIME_MAX nsecs instead of avoiding to arm it. This ends up
 causing a needless interrupt once every KTIME_MAX nsecs."

Fix this by using the new active timer accounting. This avoids scans
when no active timer is enqueued completely, so we don't have to rely
on base->timer_next and base->timer_jiffies anymore.

Reported-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/timer.c |   13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

Index: tip/kernel/timer.c
===================================================================
--- tip.orig/kernel/timer.c
+++ tip/kernel/timer.c
@@ -1326,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_ev
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
 	struct tvec_base *base = __this_cpu_read(tvec_bases);
-	unsigned long expires;
+	unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
 	 * Possible pending timers will be migrated later to an active cpu.
 	 */
 	if (cpu_is_offline(smp_processor_id()))
-		return now + NEXT_TIMER_MAX_DELTA;
+		return expires;
+
 	spin_lock(&base->lock);
-	if (time_before_eq(base->next_timer, base->timer_jiffies))
-		base->next_timer = __next_timer_interrupt(base);
-	expires = base->next_timer;
+	if (base->active_timers) {
+		if (time_before_eq(base->next_timer, base->timer_jiffies))
+			base->next_timer = __next_timer_interrupt(base);
+		expires = base->next_timer;
+	}
 	spin_unlock(&base->lock);
 
 	if (time_before_eq(expires, now))



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 2/4] timers: Consolidate base->next_timer update
  2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
@ 2012-05-29  6:34   ` Nikunj A Dadhania
  2012-05-29  9:38     ` Thomas Gleixner
  2012-06-06 11:59   ` [tip:timers/core] " tip-bot for Thomas Gleixner
  1 sibling, 1 reply; 12+ messages in thread
From: Nikunj A Dadhania @ 2012-05-29  6:34 UTC (permalink / raw)
  To: Thomas Gleixner, LKML
  Cc: Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

> 
> -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> +static void
> +__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
>  {
>  	unsigned long expires = timer->expires;
>  	unsigned long idx = expires - base->timer_jiffies;
> @@ -372,6 +373,17 @@ static void internal_add_timer(struct tv
>  	list_add_tail(&timer->entry, vec);
>  }
> 
> +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> +{
> +	__internal_add_timer(base, timer);
> +	/*
> +	 * Update base->next_timer if this is the earliest one.
> +	 */
> +	if (time_before(timer->expires, base->next_timer) &&
> +	    !tbase_get_deferrable(timer->base))
> +		base->next_timer = timer->expires;
> +}
> +
>
Shouldn't this be like this?

+	/*
+	 * Update base->next_timer if this is the earliest one.
+	 */
+	if (time_before(timer->expires, base->next_timer) &&
+	    !tbase_get_deferrable(timer->base))
+		base->next_timer = timer->expires;
+	__internal_add_timer(base, timer);

As per the below code?

>  #ifdef CONFIG_TIMER_STATS
>  void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
>  {
> @@ -757,9 +769,6 @@ __mod_timer(struct timer_list *timer, un
>  	}
> 
>  	timer->expires = expires;
> -	if (time_before(timer->expires, base->next_timer) &&
> -	    !tbase_get_deferrable(timer->base))
> -		base->next_timer = timer->expires;
>  	internal_add_timer(base, timer);
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 2/4] timers: Consolidate base->next_timer update
  2012-05-29  6:34   ` Nikunj A Dadhania
@ 2012-05-29  9:38     ` Thomas Gleixner
  2012-05-29 10:35       ` Nikunj A Dadhania
  0 siblings, 1 reply; 12+ messages in thread
From: Thomas Gleixner @ 2012-05-29  9:38 UTC (permalink / raw)
  To: Nikunj A Dadhania
  Cc: LKML, Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

On Tue, 29 May 2012, Nikunj A Dadhania wrote:

> > 
> > -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > +static void
> > +__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> >  {
> >  	unsigned long expires = timer->expires;
> >  	unsigned long idx = expires - base->timer_jiffies;
> > @@ -372,6 +373,17 @@ static void internal_add_timer(struct tv
> >  	list_add_tail(&timer->entry, vec);
> >  }
> > 
> > +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > +{
> > +	__internal_add_timer(base, timer);
> > +	/*
> > +	 * Update base->next_timer if this is the earliest one.
> > +	 */
> > +	if (time_before(timer->expires, base->next_timer) &&
> > +	    !tbase_get_deferrable(timer->base))
> > +		base->next_timer = timer->expires;
> > +}
> > +
> >
> Shouldn't this be like this?
> 
> +	/*
> +	 * Update base->next_timer if this is the earliest one.
> +	 */
> +	if (time_before(timer->expires, base->next_timer) &&
> +	    !tbase_get_deferrable(timer->base))
> +		base->next_timer = timer->expires;
> +	__internal_add_timer(base, timer);
> 
> As per the below code?

And why should this matter?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [patch 2/4] timers: Consolidate base->next_timer update
  2012-05-29  9:38     ` Thomas Gleixner
@ 2012-05-29 10:35       ` Nikunj A Dadhania
  0 siblings, 0 replies; 12+ messages in thread
From: Nikunj A Dadhania @ 2012-05-29 10:35 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: LKML, Peter Zijlstra, Gilad Ben-Yossef, Frederic Weisbecker

On Tue, 29 May 2012 11:38:27 +0200 (CEST), Thomas Gleixner <tglx@linutronix.de> wrote:
> On Tue, 29 May 2012, Nikunj A Dadhania wrote:
> 
> > > 
> > > -static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > +static void
> > > +__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > >  {
> > >  	unsigned long expires = timer->expires;
> > >  	unsigned long idx = expires - base->timer_jiffies;
> > > @@ -372,6 +373,17 @@ static void internal_add_timer(struct tv
> > >  	list_add_tail(&timer->entry, vec);
> > >  }
> > > 
> > > +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
> > > +{
> > > +	__internal_add_timer(base, timer);
> > > +	/*
> > > +	 * Update base->next_timer if this is the earliest one.
> > > +	 */
> > > +	if (time_before(timer->expires, base->next_timer) &&
> > > +	    !tbase_get_deferrable(timer->base))
> > > +		base->next_timer = timer->expires;
> > > +}
> > > +
> > >
> > Shouldn't this be like this?
> > 
> > +	/*
> > +	 * Update base->next_timer if this is the earliest one.
> > +	 */
> > +	if (time_before(timer->expires, base->next_timer) &&
> > +	    !tbase_get_deferrable(timer->base))
> > +		base->next_timer = timer->expires;
> > +	__internal_add_timer(base, timer);
> > 
> > As per the below code?
> 
> And why should this matter?
> 
Yes it does not matter, sorry for the noise.

Looking at the internal_add_timer(), there is no such dependency. I was
thinking that the base->next_timer is changed and would be used in
__internal_add_timer.

Nikunj




^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:timers/core] timers: Create detach_if_pending() and use it
  2012-05-25 22:08 ` [patch 1/4] timers: Create detach_if_pending() and use it Thomas Gleixner
@ 2012-06-06 11:58   ` tip-bot for Thomas Gleixner
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Thomas Gleixner @ 2012-06-06 11:58 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, fweisbec, peterz, tglx, gilad

Commit-ID:  ec44bc7acc3687ba6ae8154b4b5a845b70279237
Gitweb:     http://git.kernel.org/tip/ec44bc7acc3687ba6ae8154b4b5a845b70279237
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 25 May 2012 22:08:57 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 6 Jun 2012 13:49:01 +0200

timers: Create detach_if_pending() and use it

Most callers of detach_timer() have the same pattern around
them. Check whether the timer is pending and eventually updating
base->next_timer.

Create detach_if_pending() and replace the duplicated code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20120525214819.131246037@linutronix.de

---
 kernel/timer.c |   56 +++++++++++++++++++++++---------------------------------
 1 files changed, 23 insertions(+), 33 deletions(-)

diff --git a/kernel/timer.c b/kernel/timer.c
index 6ec7e7e..0f70deb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -654,8 +654,7 @@ void init_timer_deferrable_key(struct timer_list *timer,
 }
 EXPORT_SYMBOL(init_timer_deferrable_key);
 
-static inline void detach_timer(struct timer_list *timer,
-				int clear_pending)
+static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 {
 	struct list_head *entry = &timer->entry;
 
@@ -667,6 +666,19 @@ static inline void detach_timer(struct timer_list *timer,
 	entry->prev = LIST_POISON2;
 }
 
+static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
+			     bool clear_pending)
+{
+	if (!timer_pending(timer))
+		return 0;
+
+	detach_timer(timer, clear_pending);
+	if (timer->expires == base->next_timer &&
+	    !tbase_get_deferrable(timer->base))
+		base->next_timer = base->timer_jiffies;
+	return 1;
+}
+
 /*
  * We are using hashed locking: holding per_cpu(tvec_bases).lock
  * means that all timers which are tied to this base via timer->base are
@@ -712,16 +724,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 
 	base = lock_timer_base(timer, &flags);
 
-	if (timer_pending(timer)) {
-		detach_timer(timer, 0);
-		if (timer->expires == base->next_timer &&
-		    !tbase_get_deferrable(timer->base))
-			base->next_timer = base->timer_jiffies;
-		ret = 1;
-	} else {
-		if (pending_only)
-			goto out_unlock;
-	}
+	ret = detach_if_pending(timer, base, false);
+	if (!ret && pending_only)
+		goto out_unlock;
 
 	debug_activate(timer, expires);
 
@@ -959,13 +964,7 @@ int del_timer(struct timer_list *timer)
 	timer_stats_timer_clear_start_info(timer);
 	if (timer_pending(timer)) {
 		base = lock_timer_base(timer, &flags);
-		if (timer_pending(timer)) {
-			detach_timer(timer, 1);
-			if (timer->expires == base->next_timer &&
-			    !tbase_get_deferrable(timer->base))
-				base->next_timer = base->timer_jiffies;
-			ret = 1;
-		}
+		ret = detach_if_pending(timer, base, true);
 		spin_unlock_irqrestore(&base->lock, flags);
 	}
 
@@ -990,19 +989,10 @@ int try_to_del_timer_sync(struct timer_list *timer)
 
 	base = lock_timer_base(timer, &flags);
 
-	if (base->running_timer == timer)
-		goto out;
-
-	timer_stats_timer_clear_start_info(timer);
-	ret = 0;
-	if (timer_pending(timer)) {
-		detach_timer(timer, 1);
-		if (timer->expires == base->next_timer &&
-		    !tbase_get_deferrable(timer->base))
-			base->next_timer = base->timer_jiffies;
-		ret = 1;
+	if (base->running_timer != timer) {
+		timer_stats_timer_clear_start_info(timer);
+		ret = detach_if_pending(timer, base, true);
 	}
-out:
 	spin_unlock_irqrestore(&base->lock, flags);
 
 	return ret;
@@ -1178,7 +1168,7 @@ static inline void __run_timers(struct tvec_base *base)
 			timer_stats_account_timer(timer);
 
 			base->running_timer = timer;
-			detach_timer(timer, 1);
+			detach_timer(timer, true);
 
 			spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, data);
@@ -1714,7 +1704,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
 
 	while (!list_empty(head)) {
 		timer = list_first_entry(head, struct timer_list, entry);
-		detach_timer(timer, 0);
+		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
 		if (time_before(timer->expires, new_base->next_timer) &&
 		    !tbase_get_deferrable(timer->base))

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:timers/core] timers: Consolidate base->next_timer update
  2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
  2012-05-29  6:34   ` Nikunj A Dadhania
@ 2012-06-06 11:59   ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 12+ messages in thread
From: tip-bot for Thomas Gleixner @ 2012-06-06 11:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, fweisbec, peterz, tglx, gilad

Commit-ID:  facbb4a7efbd658046bf615f03cd97a1504785d8
Gitweb:     http://git.kernel.org/tip/facbb4a7efbd658046bf615f03cd97a1504785d8
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 25 May 2012 22:08:57 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 6 Jun 2012 13:49:01 +0200

timers: Consolidate base->next_timer update

Another bunch of mindlessly copied code. All callers of
internal_add_timer() except the recascading code updates
base->next_timer.

Move this into internal_add_timer() and let the cascading code call
__internal_add_timer().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20120525214819.189946224@linutronix.de
---
 kernel/timer.c |   26 +++++++++++++++-----------
 1 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/kernel/timer.c b/kernel/timer.c
index 0f70deb..7207690 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -330,7 +330,8 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
 }
 EXPORT_SYMBOL_GPL(set_timer_slack);
 
-static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+static void
+__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	unsigned long expires = timer->expires;
 	unsigned long idx = expires - base->timer_jiffies;
@@ -372,6 +373,17 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 	list_add_tail(&timer->entry, vec);
 }
 
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+{
+	__internal_add_timer(base, timer);
+	/*
+	 * Update base->next_timer if this is the earliest one.
+	 */
+	if (time_before(timer->expires, base->next_timer) &&
+	    !tbase_get_deferrable(timer->base))
+		base->next_timer = timer->expires;
+}
+
 #ifdef CONFIG_TIMER_STATS
 void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 {
@@ -757,9 +769,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 	}
 
 	timer->expires = expires;
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
 	internal_add_timer(base, timer);
 
 out_unlock:
@@ -925,9 +934,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
 	debug_activate(timer, timer->expires);
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
 	internal_add_timer(base, timer);
 	/*
 	 * Check whether the other CPU is idle and needs to be
@@ -1079,7 +1085,8 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
 	 */
 	list_for_each_entry_safe(timer, tmp, &tv_list, entry) {
 		BUG_ON(tbase_get_base(timer->base) != base);
-		internal_add_timer(base, timer);
+		/* No accounting, while moving them */
+		__internal_add_timer(base, timer);
 	}
 
 	return index;
@@ -1706,9 +1713,6 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
 		timer = list_first_entry(head, struct timer_list, entry);
 		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
-		if (time_before(timer->expires, new_base->next_timer) &&
-		    !tbase_get_deferrable(timer->base))
-			new_base->next_timer = timer->expires;
 		internal_add_timer(new_base, timer);
 	}
 }

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:timers/core] timers: Add accounting of non deferrable timers
  2012-05-25 22:08 ` [patch 3/4] timers: Add accounting of non deferrable timers Thomas Gleixner
@ 2012-06-06 12:00   ` tip-bot for Thomas Gleixner
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Thomas Gleixner @ 2012-06-06 12:00 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, fweisbec, peterz, tglx, gilad

Commit-ID:  99d5f3aac674fe081ffddd2dbb8946ccbc14c410
Gitweb:     http://git.kernel.org/tip/99d5f3aac674fe081ffddd2dbb8946ccbc14c410
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 25 May 2012 22:08:58 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 6 Jun 2012 13:49:01 +0200

timers: Add accounting of non deferrable timers

The code in get_next_timer_interrupt() is suboptimal as it has to run
through the cascade to find the next expiring timer. On a completely
idle core we should only do that when there is an active timer
enqueued and base->next_timer does not give us a fast answer.

Add accounting of the active timers to the now consolidated
attach/detach code. I deliberately avoided sanity checks because the
code is fully symetric and any fiddling with timers w/o using the API
functions will lead to cute explosions anyway. ulong is big enough
even on 32bit and if we really run into the situation to have more
than 1<<32 timers enqueued there, then we are definitely not in a
state to go idle and run through that code.

This allows us to fix another shortcoming of get_next_timer_interrupt().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Gilad Ben-Yossef <gilad@benyossef.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20120525214819.236377028@linutronix.de
---
 kernel/timer.c |   31 +++++++++++++++++++++++--------
 1 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/kernel/timer.c b/kernel/timer.c
index 7207690..7fada69 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -77,6 +77,7 @@ struct tvec_base {
 	struct timer_list *running_timer;
 	unsigned long timer_jiffies;
 	unsigned long next_timer;
+	unsigned long active_timers;
 	struct tvec_root tv1;
 	struct tvec tv2;
 	struct tvec tv3;
@@ -377,11 +378,13 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
 {
 	__internal_add_timer(base, timer);
 	/*
-	 * Update base->next_timer if this is the earliest one.
+	 * Update base->active_timers and base->next_timer
 	 */
-	if (time_before(timer->expires, base->next_timer) &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = timer->expires;
+	if (!tbase_get_deferrable(timer->base)) {
+		if (time_before(timer->expires, base->next_timer))
+			base->next_timer = timer->expires;
+		base->active_timers++;
+	}
 }
 
 #ifdef CONFIG_TIMER_STATS
@@ -678,6 +681,14 @@ static inline void detach_timer(struct timer_list *timer, bool clear_pending)
 	entry->prev = LIST_POISON2;
 }
 
+static inline void
+detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
+{
+	detach_timer(timer, true);
+	if (!tbase_get_deferrable(timer->base))
+		timer->base->active_timers--;
+}
+
 static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
 			     bool clear_pending)
 {
@@ -685,9 +696,11 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
 		return 0;
 
 	detach_timer(timer, clear_pending);
-	if (timer->expires == base->next_timer &&
-	    !tbase_get_deferrable(timer->base))
-		base->next_timer = base->timer_jiffies;
+	if (!tbase_get_deferrable(timer->base)) {
+		timer->base->active_timers--;
+		if (timer->expires == base->next_timer)
+			base->next_timer = base->timer_jiffies;
+	}
 	return 1;
 }
 
@@ -1175,7 +1188,7 @@ static inline void __run_timers(struct tvec_base *base)
 			timer_stats_account_timer(timer);
 
 			base->running_timer = timer;
-			detach_timer(timer, true);
+			detach_expired_timer(timer, base);
 
 			spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, data);
@@ -1701,6 +1714,7 @@ static int __cpuinit init_timers_cpu(int cpu)
 
 	base->timer_jiffies = jiffies;
 	base->next_timer = base->timer_jiffies;
+	base->active_timers = 0;
 	return 0;
 }
 
@@ -1711,6 +1725,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
 
 	while (!list_empty(head)) {
 		timer = list_first_entry(head, struct timer_list, entry);
+		/* We ignore the accounting on the dying cpu */
 		detach_timer(timer, false);
 		timer_set_base(timer, new_base);
 		internal_add_timer(new_base, timer);

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:timers/core] timers: Improve get_next_timer_interrupt()
  2012-05-25 22:08 ` [patch 4/4] timers: Improve get_next_timer_interrupt() Thomas Gleixner
@ 2012-06-06 12:01   ` tip-bot for Thomas Gleixner
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Thomas Gleixner @ 2012-06-06 12:01 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, fweisbec, peterz, tglx, gilad

Commit-ID:  e40468a54882ef7411fb178dbf2e465ec2349af7
Gitweb:     http://git.kernel.org/tip/e40468a54882ef7411fb178dbf2e465ec2349af7
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Fri, 25 May 2012 22:08:59 +0000
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 6 Jun 2012 13:49:02 +0200

timers: Improve get_next_timer_interrupt()

Gilad reported at

 http://lkml.kernel.org/r/1336056962-10465-2-git-send-email-gilad@benyossef.com

"Current timer code fails to correctly return a value meaning that
 there is no future timer event, with the result that the timer keeps
 getting re-armed in HZ one shot mode even when we could turn it off,
 generating unneeded interrupts.

 What is happening is that when __next_timer_interrupt() wishes
 to return a value that signifies "there is no future timer
 event", it returns (base->timer_jiffies + NEXT_TIMER_MAX_DELTA).

 However, the code in tick_nohz_stop_sched_tick(), which called
 __next_timer_interrupt() via get_next_timer_interrupt(),
 compares the return value to (last_jiffies + NEXT_TIMER_MAX_DELTA)
 to see if the timer needs to be re-armed.

 base->timer_jiffies != last_jiffies and so tick_nohz_stop_sched_tick()
 interperts the return value as indication that there is a distant
 future event 12 days from now and programs the timer to fire next
 after KTIME_MAX nsecs instead of avoiding to arm it. This ends up
 causing a needless interrupt once every KTIME_MAX nsecs."

Fix this by using the new active timer accounting. This avoids scans
when no active timer is enqueued completely, so we don't have to rely
on base->timer_next and base->timer_jiffies anymore.

Reported-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/r/20120525214819.317535385@linutronix.de
---
 kernel/timer.c |   13 ++++++++-----
 1 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/kernel/timer.c b/kernel/timer.c
index 7fada69..a61c093 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1326,18 +1326,21 @@ static unsigned long cmp_next_hrtimer_event(unsigned long now,
 unsigned long get_next_timer_interrupt(unsigned long now)
 {
 	struct tvec_base *base = __this_cpu_read(tvec_bases);
-	unsigned long expires;
+	unsigned long expires = now + NEXT_TIMER_MAX_DELTA;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
 	 * Possible pending timers will be migrated later to an active cpu.
 	 */
 	if (cpu_is_offline(smp_processor_id()))
-		return now + NEXT_TIMER_MAX_DELTA;
+		return expires;
+
 	spin_lock(&base->lock);
-	if (time_before_eq(base->next_timer, base->timer_jiffies))
-		base->next_timer = __next_timer_interrupt(base);
-	expires = base->next_timer;
+	if (base->active_timers) {
+		if (time_before_eq(base->next_timer, base->timer_jiffies))
+			base->next_timer = __next_timer_interrupt(base);
+		expires = base->next_timer;
+	}
 	spin_unlock(&base->lock);
 
 	if (time_before_eq(expires, now))

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2012-06-06 12:01 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-25 22:08 [patch 0/4] timers: Fix get_next_timer_interrupt() proper Thomas Gleixner
2012-05-25 22:08 ` [patch 2/4] timers: Consolidate base->next_timer update Thomas Gleixner
2012-05-29  6:34   ` Nikunj A Dadhania
2012-05-29  9:38     ` Thomas Gleixner
2012-05-29 10:35       ` Nikunj A Dadhania
2012-06-06 11:59   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2012-05-25 22:08 ` [patch 1/4] timers: Create detach_if_pending() and use it Thomas Gleixner
2012-06-06 11:58   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2012-05-25 22:08 ` [patch 3/4] timers: Add accounting of non deferrable timers Thomas Gleixner
2012-06-06 12:00   ` [tip:timers/core] " tip-bot for Thomas Gleixner
2012-05-25 22:08 ` [patch 4/4] timers: Improve get_next_timer_interrupt() Thomas Gleixner
2012-06-06 12:01   ` [tip:timers/core] " tip-bot for Thomas Gleixner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox