All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>,
	Vincent Guittot <vincent.guittot@linaro.org>,
	Mike Galbraith <bitbucket@online.de>,
	linuxppc-dev@lists.ozlabs.org,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Anton Blanchard <anton@samba.org>, Paul Turner <pjt@google.com>,
	Ingo Molnar <mingo@kernel.org>
Subject: Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group
Date: Mon, 28 Oct 2013 14:50:43 +0100	[thread overview]
Message-ID: <20131028135043.GP19466@laptop.lan> (raw)
In-Reply-To: <5268D54A.9060604@linux.vnet.ibm.com>

On Thu, Oct 24, 2013 at 01:37:38PM +0530, Preeti U Murthy wrote:
>  kernel/sched/core.c  |    5 +++++
>  kernel/sched/fair.c  |   38 ++++++++++++++++++++------------------
>  kernel/sched/sched.h |    1 +
>  3 files changed, 26 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c06b8d3..c540392 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
>  DEFINE_PER_CPU(int, sd_llc_size);
>  DEFINE_PER_CPU(int, sd_llc_id);
>  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
> +DEFINE_PER_CPU(struct sched_domain *, sd_busy);
>  
>  static void update_top_cache_domain(int cpu)
>  {
> @@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu)
>  
>  	sd = lowest_flag_domain(cpu, SD_NUMA);
>  	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
> +
> +	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
> +	if (sd)
> +		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
>  }
>  
>  /*
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e9c9549..f66cfd9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu)
>  static inline void set_cpu_sd_state_busy(void)
>  {
>  	struct sched_domain *sd;
> +	int cpu = smp_processor_id();
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
>  	if (!sd || !sd->nohz_idle)
>  		goto unlock;
>  	sd->nohz_idle = 0;
>  
> +	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
>  unlock:
>  	rcu_read_unlock();
>  }
> @@ -6532,16 +6532,16 @@ unlock:
>  void set_cpu_sd_state_idle(void)
>  {
>  	struct sched_domain *sd;
> +	int cpu = smp_processor_id();
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
>  	if (!sd || sd->nohz_idle)
>  		goto unlock;
>  	sd->nohz_idle = 1;
>  
> +	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
>  unlock:
>  	rcu_read_unlock();
>  }

Oh nice, that gets rid of the multiple atomics, and it nicely splits
this nohz logic into per topology groups -- now if only we could split
the rest too :-)

> @@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
>  {
>  	unsigned long now = jiffies;
>  	struct sched_domain *sd;
> +	struct sched_group_power *sgp;
> +	int nr_busy;
>  
>  	if (unlikely(idle_cpu(cpu)))
>  		return 0;
> @@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
>  		goto need_kick;
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
> +	if (sd) {
> +		sgp = sd->groups->sgp;
> +		nr_busy = atomic_read(&sgp->nr_busy_cpus);
>  
> +		if (nr_busy > 1)
>  			goto need_kick_unlock;
>  	}

OK, so far so good.

> +
> +	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
> +
> +	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
> +				  sched_domain_span(sd)) < cpu))
> +		goto need_kick_unlock;
> +
>  	rcu_read_unlock();
>  	return 0;

This again is a bit sad; most archs will not have SD_ASYM_PACKING set at
all; this means that they all will do a complete (and pointless) sched
domain tree walk here.

It would be much better to also introduce sd_asym and do the analogous
thing to the new sd_busy.

WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <peterz@infradead.org>
To: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>,
	Ingo Molnar <mingo@kernel.org>,
	Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>,
	Mike Galbraith <bitbucket@online.de>,
	Paul Turner <pjt@google.com>, Michael Neuling <mikey@neuling.org>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	Anton Blanchard <anton@samba.org>,
	linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group
Date: Mon, 28 Oct 2013 14:50:43 +0100	[thread overview]
Message-ID: <20131028135043.GP19466@laptop.lan> (raw)
In-Reply-To: <5268D54A.9060604@linux.vnet.ibm.com>

On Thu, Oct 24, 2013 at 01:37:38PM +0530, Preeti U Murthy wrote:
>  kernel/sched/core.c  |    5 +++++
>  kernel/sched/fair.c  |   38 ++++++++++++++++++++------------------
>  kernel/sched/sched.h |    1 +
>  3 files changed, 26 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c06b8d3..c540392 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5271,6 +5271,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
>  DEFINE_PER_CPU(int, sd_llc_size);
>  DEFINE_PER_CPU(int, sd_llc_id);
>  DEFINE_PER_CPU(struct sched_domain *, sd_numa);
> +DEFINE_PER_CPU(struct sched_domain *, sd_busy);
>  
>  static void update_top_cache_domain(int cpu)
>  {
> @@ -5290,6 +5291,10 @@ static void update_top_cache_domain(int cpu)
>  
>  	sd = lowest_flag_domain(cpu, SD_NUMA);
>  	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
> +
> +	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
> +	if (sd)
> +		rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
>  }
>  
>  /*
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index e9c9549..f66cfd9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6515,16 +6515,16 @@ static inline void nohz_balance_exit_idle(int cpu)
>  static inline void set_cpu_sd_state_busy(void)
>  {
>  	struct sched_domain *sd;
> +	int cpu = smp_processor_id();
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
>  	if (!sd || !sd->nohz_idle)
>  		goto unlock;
>  	sd->nohz_idle = 0;
>  
> +	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
>  unlock:
>  	rcu_read_unlock();
>  }
> @@ -6532,16 +6532,16 @@ unlock:
>  void set_cpu_sd_state_idle(void)
>  {
>  	struct sched_domain *sd;
> +	int cpu = smp_processor_id();
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
>  	if (!sd || sd->nohz_idle)
>  		goto unlock;
>  	sd->nohz_idle = 1;
>  
> +	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
>  unlock:
>  	rcu_read_unlock();
>  }

Oh nice, that gets rid of the multiple atomics, and it nicely splits
this nohz logic into per topology groups -- now if only we could split
the rest too :-)

> @@ -6748,6 +6748,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
>  {
>  	unsigned long now = jiffies;
>  	struct sched_domain *sd;
> +	struct sched_group_power *sgp;
> +	int nr_busy;
>  
>  	if (unlikely(idle_cpu(cpu)))
>  		return 0;
> @@ -6773,22 +6775,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
>  		goto need_kick;
>  
>  	rcu_read_lock();
> +	sd = rcu_dereference(per_cpu(sd_busy, cpu));
>  
> +	if (sd) {
> +		sgp = sd->groups->sgp;
> +		nr_busy = atomic_read(&sgp->nr_busy_cpus);
>  
> +		if (nr_busy > 1)
>  			goto need_kick_unlock;
>  	}

OK, so far so good.

> +
> +	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
> +
> +	if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
> +				  sched_domain_span(sd)) < cpu))
> +		goto need_kick_unlock;
> +
>  	rcu_read_unlock();
>  	return 0;

This again is a bit sad; most archs will not have SD_ASYM_PACKING set at
all; this means that they all will do a complete (and pointless) sched
domain tree walk here.

It would be much better to also introduce sd_asym and do the analogous
thing to the new sd_busy.


  reply	other threads:[~2013-10-28 13:50 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-21 11:44 [PATCH 0/3] sched: Fixes for task placement in SMT threads Vaidyanathan Srinivasan
2013-10-21 11:44 ` Vaidyanathan Srinivasan
2013-10-21 11:44 ` [PATCH 1/3] sched: Fix nohz_kick_needed to consider the nr_busy of the parent domain's group Vaidyanathan Srinivasan
2013-10-21 11:44   ` Vaidyanathan Srinivasan
2013-10-22 14:35   ` Kamalesh Babulal
2013-10-22 14:35     ` Kamalesh Babulal
2013-10-22 16:40     ` Preeti U Murthy
2013-10-22 16:40       ` Preeti U Murthy
2013-10-22 22:11   ` Peter Zijlstra
2013-10-22 22:11     ` Peter Zijlstra
2013-10-23  4:00     ` Preeti U Murthy
2013-10-23  4:00       ` Preeti U Murthy
2013-10-23  4:21       ` Preeti U Murthy
2013-10-23  9:50     ` Preeti U Murthy
2013-10-23  9:50       ` Preeti U Murthy
2013-10-23 15:28       ` Vincent Guittot
2013-10-23 15:28         ` Vincent Guittot
2013-10-24  8:07         ` Preeti U Murthy
2013-10-24  8:07           ` Preeti U Murthy
2013-10-28 13:50           ` Peter Zijlstra [this message]
2013-10-28 13:50             ` Peter Zijlstra
2013-10-29  3:30             ` Preeti U Murthy
2013-10-29  3:30               ` Preeti U Murthy
2013-10-29 13:26               ` Peter Zijlstra
2013-10-29 13:26                 ` Peter Zijlstra
2013-10-21 11:44 ` [PATCH 2/3] sched: Fix asymmetric scheduling for POWER7 Vaidyanathan Srinivasan
2013-10-21 11:44   ` Vaidyanathan Srinivasan
2013-10-21 22:55   ` Michael Neuling
2013-10-21 22:55     ` Michael Neuling
2013-10-22 22:18   ` Peter Zijlstra
2013-10-22 22:18     ` Peter Zijlstra
2013-10-21 11:45 ` [PATCH 3/3] sched: Aggressive balance in domains whose groups share package resources Vaidyanathan Srinivasan
2013-10-21 11:45   ` Vaidyanathan Srinivasan
2013-10-22 22:23   ` Peter Zijlstra
2013-10-22 22:23     ` Peter Zijlstra
2013-10-24  4:04     ` Preeti U Murthy
2013-10-24  4:04       ` Preeti U Murthy
2013-10-25 13:19     ` Preeti U Murthy
2013-10-25 13:19       ` Preeti U Murthy
2013-10-28 15:53   ` Peter Zijlstra
2013-10-28 15:53     ` Peter Zijlstra
2013-10-29  5:35     ` Preeti U Murthy
2013-10-29  5:35       ` Preeti U Murthy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131028135043.GP19466@laptop.lan \
    --to=peterz@infradead.org \
    --cc=anton@samba.org \
    --cc=bitbucket@online.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mikey@neuling.org \
    --cc=mingo@kernel.org \
    --cc=pjt@google.com \
    --cc=preeti@linux.vnet.ibm.com \
    --cc=vincent.guittot@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.