[RFC PATCH v3 3/3] sched: simplify select_task_rq_fair() with schedule balance map

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Michael Wang <wangyun@linux.vnet.ibm.com>
To: LKML <linux-kernel@vger.kernel.org>,
	Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Turner <pjt@google.com>, Mike Galbraith <efault@gmx.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	alex.shi@intel.com, Ram Pai <linuxram@us.ibm.com>,
	"Nikunj A. Dadhania" <nikunj@linux.vnet.ibm.com>,
	Namhyung Kim <namhyung@kernel.org>
Subject: [RFC PATCH v3 3/3] sched: simplify select_task_rq_fair() with schedule balance map
Date: Tue, 29 Jan 2013 17:10:45 +0800	[thread overview]
Message-ID: <51079215.1070309@linux.vnet.ibm.com> (raw)
In-Reply-To: <51079178.3070002@linux.vnet.ibm.com>

Since schedule balance map provide the approach to get proper sd directly,
simplify the code of select_task_rq_fair() is possible.

The new code is designed to reserve most of the old logical, but get rid
of those 'for' by using the schedule balance map to locate proper sd
directly.

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
---
 kernel/sched/fair.c |  135 ++++++++++++++++++++++++++++-----------------------
 1 files changed, 74 insertions(+), 61 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5eea870..0935c7d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3302,100 +3302,113 @@ done:
 }
 
 /*
- * sched_balance_self: balance the current task (running on cpu) in domains
- * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
- * SD_BALANCE_EXEC.
+ * select_task_rq_fair()
+ *		select a proper cpu for task to run.
  *
- * Balance, ie. select the least loaded group.
- *
- * Returns the target CPU number, or the same CPU if no balancing is needed.
- *
- * preempt must be disabled.
+ *	p		-- the task we are going to select cpu for
+ *	sd_flag		-- indicate the context, WAKE, EXEC or FORK.
+ *	wake_flag	-- we only care about WF_SYNC currently
  */
 static int
 select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 {
-	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+	struct sched_domain *sd = NULL;
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
 	int new_cpu = cpu;
-	int want_affine = 0;
 	int sync = wake_flags & WF_SYNC;
+	struct sched_balance_map *sbm = NULL;
+	int type = 0;
 
 	if (p->nr_cpus_allowed == 1)
 		return prev_cpu;
 
-	if (sd_flag & SD_BALANCE_WAKE) {
-		if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
-			want_affine = 1;
-		new_cpu = prev_cpu;
-	}
+	if (sd_flag & SD_BALANCE_EXEC)
+		type = SBM_EXEC_TYPE;
+	else if (sd_flag & SD_BALANCE_FORK)
+		type = SBM_FORK_TYPE;
+	else if (sd_flag & SD_BALANCE_WAKE)
+		type = SBM_WAKE_TYPE;
 
 	rcu_read_lock();
-	for_each_domain(cpu, tmp) {
-		if (!(tmp->flags & SD_LOAD_BALANCE))
-			continue;
 
+	sbm = cpu_rq(cpu)->sbm;
+	if (!sbm)
+		goto unlock;
+
+	if (sd_flag & SD_BALANCE_WAKE) {
 		/*
-		 * If both cpu and prev_cpu are part of this domain,
-		 * cpu is a valid SD_WAKE_AFFINE target.
+		 * Tasks to be waked is special, memory it relied on
+		 * may has already been cached on prev_cpu, and usually
+		 * they require low latency.
+		 *
+		 * So firstly try to locate an idle cpu shared the cache
+		 * with prev_cpu, it has the chance to break the load
+		 * balance, fortunately, select_idle_sibling() will search
+		 * from top to bottom, which help to reduce the chance in
+		 * some cases.
 		 */
-		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
-		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
-			affine_sd = tmp;
-			break;
-		}
+		new_cpu = select_idle_sibling(p, prev_cpu);
+		if (idle_cpu(new_cpu))
+			goto unlock;
 
-		if (tmp->flags & sd_flag)
-			sd = tmp;
-	}
+		/*
+		 * No idle cpu could be found in the topology of prev_cpu,
+		 * try search again in the topology of current cpu if it is
+		 * the affine of prev_cpu.
+		 */
+		if (cpu == prev_cpu || !sbm->affine_map[prev_cpu] ||
+				!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+			goto unlock;
 
-	if (affine_sd) {
-		if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
-			prev_cpu = cpu;
+		new_cpu = select_idle_sibling(p, cpu);
 
-		new_cpu = select_idle_sibling(p, prev_cpu);
+		/*
+		 * Invoke wake_affine() finally since it is no doubt a
+		 * performance killer.
+		 */
+		if (idle_cpu(new_cpu) &&
+				wake_affine(sbm->affine_map[prev_cpu], p, sync))
+			goto unlock;
+
+		/*
+		 * Failed to locate an idle cpu in the topology of both cpu
+		 * and prev_cpu, since the benefit of balance could not be
+		 * estimated, just adopt the prev_cpu.
+		 */
+		new_cpu = prev_cpu;
 		goto unlock;
 	}
 
+	/* Balance path, only for FORK and EXEC. */
+	new_cpu = (sd_flag & SD_BALANCE_WAKE) ? prev_cpu : cpu;
+	sd = sbm->sd[type][sbm->top_level[type]];
+
 	while (sd) {
 		int load_idx = sd->forkexec_idx;
-		struct sched_group *group;
-		int weight;
-
-		if (!(sd->flags & sd_flag)) {
-			sd = sd->child;
-			continue;
-		}
+		struct sched_group *sg = NULL;
 
 		if (sd_flag & SD_BALANCE_WAKE)
 			load_idx = sd->wake_idx;
 
-		group = find_idlest_group(sd, p, cpu, load_idx);
-		if (!group) {
-			sd = sd->child;
-			continue;
-		}
+		sg = find_idlest_group(sd, p, cpu, load_idx);
+		if (!sg)
+			goto next_sd;
 
-		new_cpu = find_idlest_cpu(group, p, cpu);
-		if (new_cpu == -1 || new_cpu == cpu) {
-			/* Now try balancing at a lower domain level of cpu */
-			sd = sd->child;
-			continue;
-		}
+		new_cpu = find_idlest_cpu(sg, p, cpu);
+		if (new_cpu != -1)
+			cpu = new_cpu;
+next_sd:
+		if (!sd->level)
+			break;
 
-		/* Now try balancing at a lower domain level of new_cpu */
-		cpu = new_cpu;
-		weight = sd->span_weight;
-		sd = NULL;
-		for_each_domain(cpu, tmp) {
-			if (weight <= tmp->span_weight)
-				break;
-			if (tmp->flags & sd_flag)
-				sd = tmp;
-		}
-		/* while loop will break here if sd == NULL */
+		sbm = cpu_rq(cpu)->sbm;
+		if (!sbm)
+			break;
+
+		sd = sbm->sd[type][sd->level - 1];
 	}
+
 unlock:
 	rcu_read_unlock();
 
-- 
1.7.4.1

next prev parent reply	other threads:[~2013-01-29  9:10 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-29  9:08 [RFC PATCH v3 0/3] sched: simplify the select_task_rq_fair() Michael Wang
2013-01-29  9:09 ` [RFC PATCH v3 1/3] sched: schedule balance map foundation Michael Wang
2013-02-20 13:21   ` Peter Zijlstra
2013-02-21  4:52     ` Michael Wang
2013-02-20 13:25   ` Peter Zijlstra
2013-02-21  4:58     ` Michael Wang
2013-02-21 11:37       ` Peter Zijlstra
2013-02-22  2:53         ` Michael Wang
2013-02-22  3:33           ` Alex Shi
2013-02-22  4:19             ` Michael Wang
2013-02-22  4:46               ` Alex Shi
2013-02-22  5:05                 ` Michael Wang
2013-01-29  9:09 ` [RFC PATCH v3 2/3] sched: build schedule balance map Michael Wang
2013-01-29  9:10 ` Michael Wang [this message]
2013-02-18  5:52 ` [RFC PATCH v3 0/3] sched: simplify the select_task_rq_fair() Michael Wang
2013-02-20 10:49 ` Ingo Molnar
2013-02-20 13:32   ` Peter Zijlstra
2013-02-20 14:05     ` Mike Galbraith
2013-02-21  5:21       ` Michael Wang
2013-02-21  5:14     ` Michael Wang
2013-02-21  4:51   ` Michael Wang
2013-02-21  6:11     ` Mike Galbraith
2013-02-21  7:00       ` Michael Wang
2013-02-21  8:10         ` Mike Galbraith
2013-02-21  9:08           ` Michael Wang
2013-02-21  9:43             ` Mike Galbraith
2013-02-22  2:36               ` Michael Wang
2013-02-22  5:02                 ` Mike Galbraith
2013-02-22  5:26                   ` Michael Wang
2013-02-22  6:13                     ` Mike Galbraith
2013-02-22  6:42                   ` Michael Wang
2013-02-22  8:17                     ` Mike Galbraith
2013-02-22  8:35                       ` Michael Wang
2013-02-22  8:21                 ` Peter Zijlstra
2013-02-22  9:10                   ` Michael Wang
2013-02-22  9:39                     ` Peter Zijlstra
2013-02-22  9:58                       ` Michael Wang
2013-02-21  9:20           ` Michael Wang
2013-02-21 10:20     ` Peter Zijlstra
2013-02-22  2:37       ` Michael Wang
2013-02-22  5:08         ` Mike Galbraith
2013-02-22  6:06           ` Michael Wang
2013-02-22  6:19             ` Mike Galbraith
2013-02-22  8:36         ` Peter Zijlstra
2013-02-22  9:11           ` Michael Wang
2013-02-22  9:57             ` Peter Zijlstra
2013-02-22 10:08               ` Michael Wang
2013-02-22  9:40           ` Mike Galbraith
2013-02-22  9:54             ` Ingo Molnar
2013-02-22 10:01               ` Mike Galbraith
2013-02-22 12:11                 ` Ingo Molnar
2013-02-22 12:35                   ` Mike Galbraith
2013-02-22 13:06                     ` Ingo Molnar
2013-02-22 14:30                       ` Mike Galbraith
2013-02-22 14:42                         ` Mike Galbraith

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5eea870 dfblob:0935c7d )
 OR (
bs:"[RFC PATCH v3 3/3] sched: simplify select_task_rq_fair() with schedule balance map" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=51079215.1070309@linux.vnet.ibm.com \
    --to=wangyun@linux.vnet.ibm.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shi@intel.com \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxram@us.ibm.com \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=nikunj@linux.vnet.ibm.com \
    --cc=pjt@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).