Re: [PATCH] sched: Fast idling of CPU when system is partially loaded

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jason Low <jason.low2@hp.com>
To: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <peterz@infradead.org>,
	Andi Kleen <andi@firstfloor.org>,
	Michel Lespinasse <walken@google.com>,
	Rik van Riel <riel@redhat.com>,
	Peter Hurley <peter@hurleysoftware.com>,
	Davidlohr Bueson <davidlohr@hp.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] sched: Fast idling of CPU when system is partially loaded
Date: Thu, 12 Jun 2014 23:01:51 -0700	[thread overview]
Message-ID: <1402639311.9617.84.camel@j-VirtualBox> (raw)
In-Reply-To: <1402608359.2970.548.camel@schen9-DESK>

On Thu, 2014-06-12 at 14:25 -0700, Tim Chen wrote:

> Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
> ---
>  kernel/sched/core.c  | 12 ++++++++----
>  kernel/sched/fair.c  | 23 +++++++++++++++++++++--
>  kernel/sched/sched.h | 10 ++++++++--
>  3 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index c6b9879..4f57221 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2630,7 +2630,7 @@ static inline struct task_struct *
>  pick_next_task(struct rq *rq, struct task_struct *prev)
>  {
>  	const struct sched_class *class = &fair_sched_class;
> -	struct task_struct *p;
> +	struct task_struct *p = NULL;
>  
>  	/*
>  	 * Optimization: we know that if all tasks are in
> @@ -2638,9 +2638,13 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
>  	 */
>  	if (likely(prev->sched_class == class &&
>  		   rq->nr_running == rq->cfs.h_nr_running)) {
> -		p = fair_sched_class.pick_next_task(rq, prev);
> -		if (unlikely(p == RETRY_TASK))
> -			goto again;
> +
> +		/* If no cpu has more than 1 task, skip */
> +		if (rq->nr_running > 0 || rq->rd->overload) {

Hi Tim,

If it is skipping if no cpu has more than 1 task, should the
above have the additional check for (rq->nr_running > 1) instead
of (rq->nr_running > 0)?

> +			p = fair_sched_class.pick_next_task(rq, prev);
> +			if (unlikely(p == RETRY_TASK))
> +				goto again;
> +		}
>  
>  		/* assumes fair_sched_class->next == idle_sched_class */
>  		if (unlikely(!p))
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 9855e87..00ab38c 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5863,7 +5863,8 @@ static inline int sg_capacity(struct lb_env *env, struct sched_group *group)
>   */
>  static inline void update_sg_lb_stats(struct lb_env *env,
>  			struct sched_group *group, int load_idx,
> -			int local_group, struct sg_lb_stats *sgs)
> +			int local_group, struct sg_lb_stats *sgs,
> +			bool *overload)
>  {
>  	unsigned long load;
>  	int i;
> @@ -5881,6 +5882,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
>  
>  		sgs->group_load += load;
>  		sgs->sum_nr_running += rq->nr_running;
> +		if (overload && rq->nr_running > 1)
> +			*overload = true;
>  #ifdef CONFIG_NUMA_BALANCING
>  		sgs->nr_numa_running += rq->nr_numa_running;
>  		sgs->nr_preferred_running += rq->nr_preferred_running;
> @@ -5991,6 +5994,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>  	struct sched_group *sg = env->sd->groups;
>  	struct sg_lb_stats tmp_sgs;
>  	int load_idx, prefer_sibling = 0;
> +	bool overload = false;
>  
>  	if (child && child->flags & SD_PREFER_SIBLING)
>  		prefer_sibling = 1;
> @@ -6011,7 +6015,13 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>  				update_group_power(env->sd, env->dst_cpu);
>  		}
>  
> -		update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
> +		if (env->sd->parent)
> +			update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
> +						NULL);
> +		else
> +			/* gather overload info if we are at root domain */
> +			update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
> +						&overload);

Would it make the code cleaner if we always call:

+	update_sg_lb_stats(env, sg, load_idx, local_group, sgs,
				   &overload);

and in update_sg_lb_stats():

+	bool is_root_domain = (env->sd->parent == NULL)


+		/* gather overload info if we are at root domain */
+		if (is_root_domain && rq->nr_running > 1)
+			*overload = true;

>  		if (local_group)
>  			goto next_group;
> @@ -6045,6 +6055,15 @@ next_group:
>  
>  	if (env->sd->flags & SD_NUMA)
>  		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
> +
> +	if (!env->sd->parent) {
> +		/* update overload indicator if we are at root domain */
> +		int i = cpumask_first(sched_domain_span(env->sd));
> +		struct rq *rq = cpu_rq(i);

Perhaps we could just use:

struct rq *rq = env->dst_rq;

> +		if (rq->rd->overload != overload)
> +			rq->rd->overload = overload;
> +	}
> +
>  }
>  
>  /**
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e47679b..a0cd5c1 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -477,6 +477,9 @@ struct root_domain {
>  	cpumask_var_t span;
>  	cpumask_var_t online;
>  
> +	/* Indicate more than one runnable task for any CPU */
> +	bool overload;
> +
>  	/*
>  	 * The bit corresponding to a CPU gets set here if such CPU has more
>  	 * than one runnable -deadline task (as it is below for RT tasks).
> @@ -1212,15 +1215,18 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
>  
>  	rq->nr_running = prev_nr + count;
>  
> -#ifdef CONFIG_NO_HZ_FULL
>  	if (prev_nr < 2 && rq->nr_running >= 2) {
> +		if (!rq->rd->overload)
> +			rq->rd->overload = true;
> +
> +#ifdef CONFIG_NO_HZ_FULL
>  		if (tick_nohz_full_cpu(rq->cpu)) {
>  			/* Order rq->nr_running write against the IPI */
>  			smp_wmb();
>  			smp_send_reschedule(rq->cpu);
>  		}
> -       }
>  #endif
> +       }
>  }
>  
>  static inline void sub_nr_running(struct rq *rq, unsigned count)

next prev parent reply	other threads:[~2014-06-13  6:01 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-12 21:25 [PATCH] sched: Fast idling of CPU when system is partially loaded Tim Chen
2014-06-13  6:01 ` Jason Low [this message]
2014-06-13 16:28   ` Tim Chen
2014-06-13 19:18     ` Jason Low
2014-06-13 20:17       ` Tim Chen
2014-06-15 16:19 ` Peter Zijlstra
2014-06-16 15:50   ` Tim Chen
2014-06-17 12:09     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1402639311.9617.84.camel@j-VirtualBox \
    --to=jason.low2@hp.com \
    --cc=andi@firstfloor.org \
    --cc=davidlohr@hp.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peter@hurleysoftware.com \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=tim.c.chen@linux.intel.com \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.