public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org,
	linux-pm@lists.linux-foundation.org, a.p.zijlstra@chello.nl,
	ego@in.ibm.com, mingo@elte.hu, andi@firstfloor.org,
	venkatesh.pallipadi@intel.com, vatsa@linux.vnet.ibm.com,
	arjan@infradead.org, svaidy@linux.vnet.ibm.com,
	Arun Bharadwaj <arun@linux.vnet.ibm.com>
Subject: Re: [v4 RFC PATCH 4/4] timers: logic to move non pinned timers
Date: Mon, 6 Apr 2009 16:12:28 +0530	[thread overview]
Message-ID: <20090406104228.GB17412@linux.vnet.ibm.com> (raw)
In-Reply-To: <20090406051656.GA17412@linux.vnet.ibm.com>

* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-06 10:46:56]:

This is a re-post of the patch after implementing the changes
suggested by Thomas.

---

This patch migrates all non pinned timers and hrtimers to the current
idle load balancer, from all the idle CPUs. Timers firing on busy CPUs
are not migrated.

While migrating hrtimers, care should be taken to check if migrating
a hrtimer would result in a latency or not. So we compare the expiry of the
hrtimer with the next timer interrupt on the target cpu and migrate the
hrtimer only if it expires *after* the next interrupt on the target cpu.
So, added a clockevents_get_next_event() helper function to return the
next_event on the target cpu's clock_event_device.

Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com>
---
 include/linux/sched.h |   12 ++++++++++++
 kernel/hrtimer.c      |   50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/sched.c        |    5 +++++
 kernel/timer.c        |   12 +++++++++++-
 4 files changed, 77 insertions(+), 2 deletions(-)

Index: linux.trees.git/kernel/timer.c
===================================================================
--- linux.trees.git.orig/kernel/timer.c
+++ linux.trees.git/kernel/timer.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
+#include <linux/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -606,7 +607,7 @@ __mod_timer(struct timer_list *timer, un
 {
 	struct tvec_base *base, *new_base;
 	unsigned long flags;
-	int ret;
+	int ret, preferred_cpu, cpu;
 
 	ret = 0;
 
@@ -627,6 +628,15 @@ __mod_timer(struct timer_list *timer, un
 
 	new_base = __get_cpu_var(tvec_bases);
 
+	cpu = smp_processor_id();
+	if (get_sysctl_timer_migration() && idle_cpu(cpu) && !pinned) {
+		preferred_cpu = get_nohz_load_balancer();
+		if (preferred_cpu >= 0)
+			cpu = preferred_cpu;
+	}
+
+	new_base = per_cpu(tvec_bases, cpu);
+
 	if (base != new_base) {
 		/*
 		 * We are trying to schedule the timer on the local CPU.
Index: linux.trees.git/kernel/hrtimer.c
===================================================================
--- linux.trees.git.orig/kernel/hrtimer.c
+++ linux.trees.git/kernel/hrtimer.c
@@ -43,6 +43,8 @@
 #include <linux/seq_file.h>
 #include <linux/err.h>
 #include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -189,6 +191,17 @@ struct hrtimer_clock_base *lock_hrtimer_
 	}
 }
 
+ktime_t clockevents_get_next_event(int cpu)
+{
+	struct tick_device *td;
+	struct clock_event_device *dev;
+
+	td = &per_cpu(tick_cpu_device, cpu);
+	dev = td->evtdev;
+
+	return dev->next_event;
+}
+
 /*
  * Switch the timer base to the current CPU when possible.
  */
@@ -198,8 +211,17 @@ switch_hrtimer_base(struct hrtimer *time
 {
 	struct hrtimer_clock_base *new_base;
 	struct hrtimer_cpu_base *new_cpu_base;
+	int cpu, preferred_cpu = -1;
+
+	cpu = smp_processor_id();
+	if (get_sysctl_timer_migration() && !pinned && idle_cpu(cpu)) {
+		preferred_cpu = get_nohz_load_balancer();
+		if (preferred_cpu >= 0)
+			cpu = preferred_cpu;
+	}
 
-	new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
 	new_base = &new_cpu_base->clock_base[base->index];
 
 	if (base != new_base) {
@@ -220,6 +242,32 @@ switch_hrtimer_base(struct hrtimer *time
 		spin_unlock(&base->cpu_base->lock);
 		spin_lock(&new_base->cpu_base->lock);
 		timer->base = new_base;
+
+		if (cpu == preferred_cpu) {
+			/* Calculate clock monotonic expiry time */
+			ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+							new_base->offset);
+
+			/*
+			 * Get the next event on target cpu from the
+			 * clock events layer.
+			 * This covers the highres=off nohz=on case as well.
+			 */
+			ktime_t next = clockevents_get_next_event(cpu);
+
+			ktime_t delta = ktime_sub(expires, next);
+
+			/*
+			 * We do not migrate the timer when it is expiring
+			 * before the next event on the target cpu because
+			 * we cannot reprogram the target cpu hardware and
+			 * we would cause it to fire late.
+			 */
+			if (delta.tv64 < 0) {
+				cpu = smp_processor_id();
+				goto again;
+			}
+		}
 	}
 	return new_base;
 }
Index: linux.trees.git/include/linux/sched.h
===================================================================
--- linux.trees.git.orig/include/linux/sched.h
+++ linux.trees.git/include/linux/sched.h
@@ -265,6 +265,7 @@ static inline int select_nohz_load_balan
 }
 #endif
 
+extern int get_nohz_load_balancer(void);
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
@@ -1769,6 +1770,17 @@ int sched_nr_latency_handler(struct ctl_
 		struct file *file, void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline int get_sysctl_timer_migration(void)
+{
+	return sysctl_timer_migration;
+}
+#else
+static inline int get_sysctl_timer_migration(void)
+{
+	return 1;
+}
+#endif
 extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
Index: linux.trees.git/kernel/sched.c
===================================================================
--- linux.trees.git.orig/kernel/sched.c
+++ linux.trees.git/kernel/sched.c
@@ -4009,6 +4009,11 @@ static struct {
 	.load_balancer = ATOMIC_INIT(-1),
 };
 
+int get_nohz_load_balancer(void)
+{
+	return atomic_read(&nohz.load_balancer);
+}
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle

  reply	other threads:[~2009-04-06 10:43 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-01 11:31 [v4 RFC PATCH 0/4] timers: Framework for migration of timers Arun R Bharadwaj
2009-04-01 11:32 ` [v4 RFC PATCH 1/4] timers: Framework for identifying pinned timers Arun R Bharadwaj
2009-04-01 11:41   ` Andi Kleen
2009-04-02  5:09     ` Arun R Bharadwaj
2009-04-01 11:34 ` [v4 RFC PATCH 2/4] timers: Identifying the existing " Arun R Bharadwaj
2009-04-01 11:36 ` [v4 RFC PATCH 3/4] timers: /proc/sys sysctl hook to enable timer migration Arun R Bharadwaj
2009-04-01 11:37 ` [v4 RFC PATCH 4/4] timers: logic to move non pinned timers Arun R Bharadwaj
2009-04-01 11:46   ` Arun R Bharadwaj
2009-04-03 21:52   ` Thomas Gleixner
2009-04-06  5:16     ` Arun R Bharadwaj
2009-04-06 10:42       ` Arun R Bharadwaj [this message]
2009-04-06 10:56         ` Thomas Gleixner
2009-04-06 15:28           ` Arun R Bharadwaj
2009-04-06 15:31             ` Arun R Bharadwaj
2009-04-06 15:35             ` Thomas Gleixner
2009-04-06 16:00               ` Arun R Bharadwaj

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090406104228.GB17412@linux.vnet.ibm.com \
    --to=arun@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=andi@firstfloor.org \
    --cc=arjan@infradead.org \
    --cc=ego@in.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@lists.linux-foundation.org \
    --cc=mingo@elte.hu \
    --cc=svaidy@linux.vnet.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vatsa@linux.vnet.ibm.com \
    --cc=venkatesh.pallipadi@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox