[PATCH 6/8] RT: Optimize our cpu selection based on topology

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Gregory Haskins <ghaskins@novell.com>
To: rostedt@goodmis.org
Cc: mingo@elte.hu, dvhltc@us.ibm.com, zijlstra@redhat.com,
	linux-kernel@vger.kernel.org, linux-rt-users@vger.kernel.org,
	ghaskins@novell.com
Subject: [PATCH 6/8] RT: Optimize our cpu selection based on topology
Date: Mon, 05 Nov 2007 16:49:03 -0700	[thread overview]
Message-ID: <20071105234903.25451.30047.stgit@lsg> (raw)
In-Reply-To: <20071105234832.25451.55430.stgit@lsg>

The current code base assumes a relatively flat CPU/core topology and will
route RT tasks to any CPU fairly equally.  In the real world, there are
various toplogies and affinities that govern where a task is best suited to
run with the smallest amount of overhead.  NUMA and multi-core CPUs are
prime examples of topologies that can impact cache performance.

Fortunately, linux is already structured to represent these topologies via
the sched_domains interface.  So we change our RT router to consult a
combination of topology and affinity policy to best place tasks during
migration.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
---

 kernel/sched.c    |    1 +
 kernel/sched_rt.c |   99 +++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 88 insertions(+), 12 deletions(-)

diff --git a/kernel/sched.c b/kernel/sched.c
index d16c686..8a27f09 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -26,6 +26,7 @@
  *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
  *  2007-10-22  RT overload balancing by Steven Rostedt
  *                 (with thanks to Gregory Haskins)
+ *  2007-11-05  RT migration/wakeup tuning by Gregory Haskins
  */
 
 #include <linux/mm.h>
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 43d4ea6..6ba5921 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -331,34 +331,109 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
 	return next;
 }
 
-static int find_lowest_rq(struct task_struct *task)
+static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
 {
-	int cpu;
-	cpumask_t cpu_mask;
-	struct rq *lowest_rq = NULL;
+	int       cpu;
+	cpumask_t valid_mask;
+	int       lowest_prio = -1;
+	int       ret         = 0;
 
-	cpus_and(cpu_mask, cpu_online_map, task->cpus_allowed);
+	cpus_clear(*lowest_mask);
+	cpus_and(valid_mask, cpu_online_map, task->cpus_allowed);
 
 	/*
 	 * Scan each rq for the lowest prio.
 	 */
-	for_each_cpu_mask(cpu, cpu_mask) {
+	for_each_cpu_mask(cpu, valid_mask) {
 		struct rq *rq = cpu_rq(cpu);
 
 		/* We look for lowest RT prio or non-rt CPU */
 		if (rq->rt.highest_prio >= MAX_RT_PRIO) {
-			lowest_rq = rq;
-			break;
+			if (ret)
+				cpus_clear(*lowest_mask);
+			cpu_set(rq->cpu, *lowest_mask);
+			return 1;
 		}
 
 		/* no locking for now */
-		if (rq->rt.highest_prio > task->prio &&
-		    (!lowest_rq || rq->rt.highest_prio > lowest_rq->rt.highest_prio)) {
-			lowest_rq = rq;
+		if ((rq->rt.highest_prio > task->prio)
+		    && (rq->rt.highest_prio >= lowest_prio)) {
+			if (rq->rt.highest_prio > lowest_prio) {
+				/* new low - clear old data */
+				lowest_prio = rq->rt.highest_prio;
+				cpus_clear(*lowest_mask);
+			}
+			cpu_set(rq->cpu, *lowest_mask);
+			ret = 1;
+		}
+	}
+
+	return ret;
+}
+
+static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
+{
+	int first;
+
+	/* "this_cpu" is cheaper to preempt than a remote processor */
+	if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
+		return this_cpu;
+
+	first = first_cpu(*mask);
+	if (first != NR_CPUS)
+		return first;
+
+	return -1;
+}
+
+static int find_lowest_rq(struct task_struct *task)
+{
+	struct sched_domain *sd;
+	cpumask_t lowest_mask;
+	int this_cpu = smp_processor_id();
+	int cpu      = task_cpu(task);
+
+	if (!find_lowest_cpus(task, &lowest_mask))
+		return -1;
+
+	/*
+	 * At this point we have built a mask of cpus representing the
+	 * lowest priority tasks in the system.  Now we want to elect
+	 * the best one based on our affinity and topology.
+	 *
+	 * We prioritize the last cpu that the task executed on since
+	 * it is most likely cache-hot in that location.
+	 */
+	if (cpu_isset(cpu, lowest_mask))
+		return cpu;
+
+	/*
+	 * Otherwise, we consult the sched_domains span maps to figure
+	 * out which cpu is logically closest to our hot cache data.
+	 */
+	if (this_cpu == cpu)
+		this_cpu = -1; /* Skip this_cpu opt if the same */
+
+	for_each_domain(cpu, sd) {
+		if (sd->flags & SD_WAKE_AFFINE) {
+			cpumask_t domain_mask;
+			int       best_cpu;
+
+			cpus_and(domain_mask, sd->span, lowest_mask);
+
+			best_cpu = pick_optimal_cpu(this_cpu,
+						    &domain_mask);
+			if (best_cpu != -1)
+				return best_cpu;
 		}
 	}
 
-	return lowest_rq ? lowest_rq->cpu : -1;
+	/*
+	 * And finally, if there were no matches within the domains
+	 * just give the caller *something* to work with from the compatible
+	 * locations.
+	 */
+	return pick_optimal_cpu(this_cpu, &lowest_mask);
 }
 
 /* Will lock the rq it finds */

next prev parent reply	other threads:[~2007-11-06  0:17 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-05 23:48 [PATCH 0/8] RT: scheduler migration/wakeup enhancements Gregory Haskins
2007-11-05 23:48 ` [PATCH 1/8] RT: Consistency cleanup for this_rq usage Gregory Haskins
2007-11-05 23:48 ` [PATCH 2/8] RT: Remove some CFS specific code from the wakeup path of RT tasks Gregory Haskins
2007-11-05 23:48 ` [PATCH 3/8] RT: Break out the search function Gregory Haskins
2007-11-05 23:48 ` [PATCH 4/8] RT: Allow current_cpu to be included in search Gregory Haskins
2007-11-05 23:48 ` [PATCH 5/8] RT: Pre-route RT tasks on wakeup Gregory Haskins
2007-11-05 23:49 ` Gregory Haskins [this message]
2007-11-05 23:49 ` [PATCH 7/8] RT: Optimize rebalancing Gregory Haskins
2007-11-05 23:49 ` [PATCH 8/8] RT: Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
  -- strict thread matches above, loose matches on Subject: below --
2007-11-05 23:45 [PATCH 0/8] RT: scheduler migration/wakeup enhancements Gregory Haskins
2007-11-05 23:45 ` [PATCH 6/8] RT: Optimize our cpu selection based on topology Gregory Haskins

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:d16c686 dfblob:8a27f09 dfblob:43d4ea6 dfblob:6ba5921 )
 OR (
bs:"[PATCH 6/8] RT: Optimize our cpu selection based on topology" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071105234903.25451.30047.stgit@lsg \
    --to=ghaskins@novell.com \
    --cc=dvhltc@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rostedt@goodmis.org \
    --cc=zijlstra@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.