All of lore.kernel.org
 help / color / mirror / Atom feed
From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
To: Vincent Guittot <vincent.guittot@linaro.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Michael Neuling <mikey@neuling.org>,
	Mike Galbraith <bitbucket@online.de>,
	linuxppc-dev@lists.ozlabs.org,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Anton Blanchard <anton@samba.org>, Paul Turner <pjt@google.com>
Subject: Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group
Date: Thu, 24 Oct 2013 13:37:38 +0530	[thread overview]
Message-ID: <5268D54A.9060604@linux.vnet.ibm.com> (raw)
In-Reply-To: <CAKfTPtAUFdmc7PasebNksYztMwRrWsGWPgN6JJJurddu+UP-hQ@mail.gmail.com>

Hi Vincent,

I have addressed your comments and below is the fresh patch. This patch
applies on PATCH 2/3 posted in this thread.

Regards
Preeti U Murthy


sched:Remove un-necessary iterations over sched domains to update/query nr_busy_cpus

From: Preeti U Murthy <preeti@linux.vnet.ibm.com>

nr_busy_cpus parameter is used by nohz_kick_needed() to find out the number
of busy cpus in a sched domain which has SD_SHARE_PKG_RESOURCES flag set.
Therefore instead of updating nr_busy_cpus at every level of sched domain,
since it is irrelevant, we can update this parameter only at the parent
domain of the sd which has this flag set. Introduce a per-cpu parameter
sd_busy which represents this parent domain.

In nohz_kick_needed() we directly query the nr_busy_cpus parameter
associated with the groups of sd_busy.

By associating sd_busy with the highest domain which has
SD_SHARE_PKG_RESOURCES flag set, we cover all lower level domains which could
have this flag set and trigger nohz_idle_balancing if any of the levels have
more than one busy cpu.

sd_busy is irrelevant for asymmetric load balancing.

While we are at it, we might as well change the nohz_idle parameter to be
updated at the sd_busy domain level alone and not the base domain level of a CPU.
This will unify the concept of busy cpus at just one level of sched domain
where it is currently used.

Signed-off-by: Preeti U Murthy<preeti@linux.vnet.ibm.com>
---
 kernel/sched/core.c  |    5 +++++
 kernel/sched/fair.c  |   38 ++++++++++++++++++++------------------
 kernel/sched/sched.h |    1 +
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c06b8d3..c540392 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu)
 
 	sd = lowest_flag_domain(cpu, SD_NUMA);
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
+
+	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+	if (sd)
+		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e9c9549..f66cfd9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu)
 static inline void set_cpu_sd_state_busy(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || !sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	for (; sd; sd = sd->parent)
-		atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -6532,16 +6532,16 @@ unlock:
 void set_cpu_sd_state_idle(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	for (; sd; sd = sd->parent)
-		atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
+	struct sched_group_power *sgp;
+	int nr_busy;
 
 	if (unlikely(idle_cpu(cpu)))
 		return 0;
@@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 		goto need_kick;
 
 	rcu_read_lock();
-	for_each_domain(cpu, sd) {
-		struct sched_group *sg = sd->groups;
-		struct sched_group_power *sgp = sg->sgp;
-		int nr_busy = atomic_read(&sgp->nr_busy_cpus);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
-		if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
-			goto need_kick_unlock;
+	if (sd) {
+		sgp = sd->groups->sgp;
+		nr_busy = atomic_read(&sgp->nr_busy_cpus);
 
-		if (sd->flags & SD_ASYM_PACKING
-		    && (cpumask_first_and(nohz.idle_cpus_mask,
-					  sched_domain_span(sd)) < cpu))
+		if (nr_busy > 1)
 			goto need_kick_unlock;
-
-		if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING)))
-			break;
 	}
+
+	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
+
+	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
+				  sched_domain_span(sd)) < cpu))
+		goto need_kick_unlock;
+
 	rcu_read_unlock();
 	return 0;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ffc7087..0f1253f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -623,6 +623,7 @@ DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 
 struct sched_group_power {
 	atomic_t ref;

WARNING: multiple messages have this Message-ID (diff)
From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
To: Vincent Guittot <vincent.guittot@linaro.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Mike Galbraith <bitbucket@online.de>,
	Paul Turner <pjt@google.com>, Michael Neuling <mikey@neuling.org>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Anton Blanchard <anton@samba.org>,
	linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group
Date: Thu, 24 Oct 2013 13:37:38 +0530	[thread overview]
Message-ID: <5268D54A.9060604@linux.vnet.ibm.com> (raw)
In-Reply-To: <CAKfTPtAUFdmc7PasebNksYztMwRrWsGWPgN6JJJurddu+UP-hQ@mail.gmail.com>

Hi Vincent,

I have addressed your comments and below is the fresh patch. This patch
applies on PATCH 2/3 posted in this thread.

Regards
Preeti U Murthy


sched:Remove un-necessary iterations over sched domains to update/query nr_busy_cpus

From: Preeti U Murthy <preeti@linux.vnet.ibm.com>

nr_busy_cpus parameter is used by nohz_kick_needed() to find out the number
of busy cpus in a sched domain which has SD_SHARE_PKG_RESOURCES flag set.
Therefore instead of updating nr_busy_cpus at every level of sched domain,
since it is irrelevant, we can update this parameter only at the parent
domain of the sd which has this flag set. Introduce a per-cpu parameter
sd_busy which represents this parent domain.

In nohz_kick_needed() we directly query the nr_busy_cpus parameter
associated with the groups of sd_busy.

By associating sd_busy with the highest domain which has
SD_SHARE_PKG_RESOURCES flag set, we cover all lower level domains which could
have this flag set and trigger nohz_idle_balancing if any of the levels have
more than one busy cpu.

sd_busy is irrelevant for asymmetric load balancing.

While we are at it, we might as well change the nohz_idle parameter to be
updated at the sd_busy domain level alone and not the base domain level of a CPU.
This will unify the concept of busy cpus at just one level of sched domain
where it is currently used.

Signed-off-by: Preeti U Murthy<preeti@linux.vnet.ibm.com>
---
 kernel/sched/core.c  |    5 +++++
 kernel/sched/fair.c  |   38 ++++++++++++++++++++------------------
 kernel/sched/sched.h |    1 +
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c06b8d3..c540392 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 
 static void update_top_cache_domain(int cpu)
 {
@@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu)
 
 	sd = lowest_flag_domain(cpu, SD_NUMA);
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
+
+	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+	if (sd)
+		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e9c9549..f66cfd9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu)
 static inline void set_cpu_sd_state_busy(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || !sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	for (; sd; sd = sd->parent)
-		atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -6532,16 +6532,16 @@ unlock:
 void set_cpu_sd_state_idle(void)
 {
 	struct sched_domain *sd;
+	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference_check_sched_domain(this_rq()->sd);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (!sd || sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	for (; sd; sd = sd->parent)
-		atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
+	struct sched_group_power *sgp;
+	int nr_busy;
 
 	if (unlikely(idle_cpu(cpu)))
 		return 0;
@@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
 		goto need_kick;
 
 	rcu_read_lock();
-	for_each_domain(cpu, sd) {
-		struct sched_group *sg = sd->groups;
-		struct sched_group_power *sgp = sg->sgp;
-		int nr_busy = atomic_read(&sgp->nr_busy_cpus);
+	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
-		if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
-			goto need_kick_unlock;
+	if (sd) {
+		sgp = sd->groups->sgp;
+		nr_busy = atomic_read(&sgp->nr_busy_cpus);
 
-		if (sd->flags & SD_ASYM_PACKING
-		    && (cpumask_first_and(nohz.idle_cpus_mask,
-					  sched_domain_span(sd)) < cpu))
+		if (nr_busy > 1)
 			goto need_kick_unlock;
-
-		if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING)))
-			break;
 	}
+
+	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
+
+	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
+				  sched_domain_span(sd)) < cpu))
+		goto need_kick_unlock;
+
 	rcu_read_unlock();
 	return 0;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ffc7087..0f1253f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -623,6 +623,7 @@ DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 
 struct sched_group_power {
 	atomic_t ref;


  reply	other threads:[~2013-10-24  8:10 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-21 11:44 [PATCH 0/3] sched: Fixes for task placement in SMT threads Vaidyanathan Srinivasan
2013-10-21 11:44 ` Vaidyanathan Srinivasan
2013-10-21 11:44 ` [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group Vaidyanathan Srinivasan
2013-10-21 11:44   ` Vaidyanathan Srinivasan
2013-10-22 14:35   ` Kamalesh Babulal
2013-10-22 14:35     ` Kamalesh Babulal
2013-10-22 16:40     ` Preeti U Murthy
2013-10-22 16:40       ` Preeti U Murthy
2013-10-22 22:11   ` Peter Zijlstra
2013-10-22 22:11     ` Peter Zijlstra
2013-10-23  4:00     ` Preeti U Murthy
2013-10-23  4:00       ` Preeti U Murthy
2013-10-23  4:21       ` Preeti U Murthy
2013-10-23  9:50     ` Preeti U Murthy
2013-10-23  9:50       ` Preeti U Murthy
2013-10-23 15:28       ` Vincent Guittot
2013-10-23 15:28         ` Vincent Guittot
2013-10-24  8:07         ` Preeti U Murthy [this message]
2013-10-24  8:07           ` Preeti U Murthy
2013-10-28 13:50           ` Peter Zijlstra
2013-10-28 13:50             ` Peter Zijlstra
2013-10-29  3:30             ` Preeti U Murthy
2013-10-29  3:30               ` Preeti U Murthy
2013-10-29 13:26               ` Peter Zijlstra
2013-10-29 13:26                 ` Peter Zijlstra
2013-10-21 11:44 ` [PATCH 2/3] sched: Fix asymmetric scheduling for POWER7 Vaidyanathan Srinivasan
2013-10-21 11:44   ` Vaidyanathan Srinivasan
2013-10-21 22:55   ` Michael Neuling
2013-10-21 22:55     ` Michael Neuling
2013-10-22 22:18   ` Peter Zijlstra
2013-10-22 22:18     ` Peter Zijlstra
2013-10-21 11:45 ` [PATCH 3/3] sched: Aggressive balance in domains whose groups share package resources Vaidyanathan Srinivasan
2013-10-21 11:45   ` Vaidyanathan Srinivasan
2013-10-22 22:23   ` Peter Zijlstra
2013-10-22 22:23     ` Peter Zijlstra
2013-10-24  4:04     ` Preeti U Murthy
2013-10-24  4:04       ` Preeti U Murthy
2013-10-25 13:19     ` Preeti U Murthy
2013-10-25 13:19       ` Preeti U Murthy
2013-10-28 15:53   ` Peter Zijlstra
2013-10-28 15:53     ` Peter Zijlstra
2013-10-29  5:35     ` Preeti U Murthy
2013-10-29  5:35       ` Preeti U Murthy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5268D54A.9060604@linux.vnet.ibm.com \
    --to=preeti@linux.vnet.ibm.com \
    --cc=anton@samba.org \
    --cc=bitbucket@online.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mikey@neuling.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.