All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: mingo@kernel.org, pjt@google.com, vatsa@in.ibm.com,
	suresh.b.siddha@intel.com, efault@gmx.de
Cc: linux-kernel@vger.kernel.org, Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [RFC][PATCH 2/5] sched, fair: Add some serialization to the sched_domain load-balance walk
Date: Tue, 01 May 2012 20:14:32 +0200	[thread overview]
Message-ID: <20120501182610.803671590@chello.nl> (raw)
In-Reply-To: 20120501181430.007891123@chello.nl

[-- Attachment #1: sched-balance-serialize.patch --]
[-- Type: text/plain, Size: 2296 bytes --]

Since the sched_domain walk is completely unserialized (!SD_SERIALIZE)
it is possible that multiple cpus in the group get elected to do the
next level. Avoid this by adding some serialization.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/sched.h |    1 +
 kernel/sched/core.c   |    2 ++
 kernel/sched/fair.c   |    9 +++++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -927,6 +927,7 @@ struct sched_group_power {
 struct sched_group {
 	struct sched_group *next;	/* Must be a circular list */
 	atomic_t ref;
+	int balance_cpu;
 
 	unsigned int group_weight;
 	struct sched_group_power *sgp;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6057,6 +6057,7 @@ build_overlap_sched_groups(struct sched_
 
 		sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
 		atomic_inc(&sg->sgp->ref);
+		sg->balance_cpu = -1;
 
 		if (cpumask_test_cpu(cpu, sg_span))
 			groups = sg;
@@ -6132,6 +6133,7 @@ build_sched_groups(struct sched_domain *
 
 		cpumask_clear(sched_group_cpus(sg));
 		sg->sgp->power = 0;
+		sg->balance_cpu = -1;
 
 		for_each_cpu(j, span) {
 			if (get_group(j, sdd, NULL) != group)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3831,7 +3831,8 @@ static inline void update_sg_lb_stats(st
 	 */
 	if (local_group) {
 		if (idle != CPU_NEWLY_IDLE) {
-			if (balance_cpu != this_cpu) {
+			if (balance_cpu != this_cpu ||
+			    cmpxchg(&group->balance_cpu, -1, balance_cpu) != -1) {
 				*balance = 0;
 				return;
 			}
@@ -4933,7 +4934,7 @@ static void rebalance_domains(int cpu, e
 	int balance = 1;
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long interval;
-	struct sched_domain *sd;
+	struct sched_domain *sd, *last = NULL;
 	/* Earliest time when we have to do rebalance again */
 	unsigned long next_balance = jiffies + 60*HZ;
 	int update_next_balance = 0;
@@ -4943,6 +4944,7 @@ static void rebalance_domains(int cpu, e
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
+		last = sd;
 		if (!(sd->flags & SD_LOAD_BALANCE))
 			continue;
 
@@ -4987,6 +4989,9 @@ static void rebalance_domains(int cpu, e
 		if (!balance)
 			break;
 	}
+	for (sd = last; sd; sd = sd->child)
+		(void)cmpxchg(&sd->groups->balance_cpu, cpu, -1);
+
 	rcu_read_unlock();
 
 	/*



  parent reply	other threads:[~2012-05-01 18:29 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-01 18:14 [RFC][PATCH 0/5] various sched and numa bits Peter Zijlstra
2012-05-01 18:14 ` [RFC][PATCH 1/5] sched, fair: Let minimally loaded cpu balance the group Peter Zijlstra
2012-05-02 10:25   ` Srivatsa Vaddagiri
2012-05-02 10:31     ` Peter Zijlstra
2012-05-02 10:34       ` Srivatsa Vaddagiri
2012-05-04  0:05         ` Suresh Siddha
2012-05-04 16:09           ` Peter Zijlstra
2012-05-01 18:14 ` Peter Zijlstra [this message]
2012-05-01 18:14 ` [RFC][PATCH 3/5] x86: Allow specifying node_distance() for numa=fake Peter Zijlstra
2012-05-01 18:14 ` [RFC][PATCH 4/5] x86: Hard partition cpu topology masks on node boundaries Peter Zijlstra
2012-05-01 18:14 ` [RFC][PATCH 5/5] sched: Rewrite the CONFIG_NUMA sched domain support Peter Zijlstra
2012-05-01 18:14   ` Peter Zijlstra
2012-05-01 18:14   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120501182610.803671590@chello.nl \
    --to=a.p.zijlstra@chello.nl \
    --cc=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=pjt@google.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=vatsa@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.