All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mike Galbraith <efault@gmx.de>
To: Nikhil Rao <ncrao@google.com>
Cc: Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <peterz@infradead.org>,
	Venkatesh Pallipadi <venki@google.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 0/3][RFC] Improve load balancing when tasks have large weight differential
Date: Sun, 10 Oct 2010 12:15:34 +0200	[thread overview]
Message-ID: <1286705734.7478.22.camel@marge.simson.net> (raw)
In-Reply-To: <AANLkTim8m6r_8pFO6o0uOnki6mNoaR+8NZ4M0pO3H515@mail.gmail.com>

On Fri, 2010-10-08 at 13:34 -0700, Nikhil Rao wrote:

> I have attached a patch that tackles the problem in different way.
> Instead of preventing the sched group from entering the bad state, it
> shortcuts the checks in fbg if the group has extra capacity, where
> extra capacity is defined as group_capacity > nr_running. The patch
> exposes a sched feature called PREFER_UTILIZATION (disabled by
> default). When this is enabled, f_b_g shortcuts the checks if the
> local group has capacity. This actually works quite well.

Yeah, it does seem to work well.

I don't like the sched feature much though, a domain flag seems more
appropriate.  I bent your patch up a bit to correct utilization woes
during NEWIDLE balancing instead.. still seems to work fine.

---
 kernel/sched_fair.c |   30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

Index: linux-2.6.36.git/kernel/sched_fair.c
===================================================================
--- linux-2.6.36.git.orig/kernel/sched_fair.c
+++ linux-2.6.36.git/kernel/sched_fair.c
@@ -1764,6 +1764,10 @@ static void pull_task(struct rq *src_rq,
 	set_task_cpu(p, this_cpu);
 	activate_task(this_rq, p, 0);
 	check_preempt_curr(this_rq, p, 0);
+
+	/* re-arm NEWIDLE balancing when moving tasks */
+	src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost;
+	this_rq->idle_stamp = 0;
 }
 
 /*
@@ -2030,12 +2034,14 @@ struct sd_lb_stats {
 	unsigned long this_load;
 	unsigned long this_load_per_task;
 	unsigned long this_nr_running;
+	unsigned long this_has_capacity;
 
 	/* Statistics of the busiest group */
 	unsigned long max_load;
 	unsigned long busiest_load_per_task;
 	unsigned long busiest_nr_running;
 	unsigned long busiest_group_capacity;
+	unsigned long busiest_has_capacity;
 
 	int group_imb; /* Is there imbalance in this sd */
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2058,6 +2064,7 @@ struct sg_lb_stats {
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long group_capacity;
 	int group_imb; /* Is there an imbalance in the group ? */
+	int group_has_capacity; /* Is there extra capacity in the group? */
 };
 
 /**
@@ -2454,6 +2461,9 @@ static inline void update_sg_lb_stats(st
 		DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
 	if (!sgs->group_capacity)
 		sgs->group_capacity = fix_small_capacity(sd, group);
+
+	if (sgs->group_capacity > sgs->sum_nr_running)
+		sgs->group_has_capacity = 1;
 }
 
 /**
@@ -2552,12 +2562,14 @@ static inline void update_sd_lb_stats(st
 			sds->this = sg;
 			sds->this_nr_running = sgs.sum_nr_running;
 			sds->this_load_per_task = sgs.sum_weighted_load;
+			sds->this_has_capacity = sgs.group_has_capacity;
 		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
 			sds->max_load = sgs.avg_load;
 			sds->busiest = sg;
 			sds->busiest_nr_running = sgs.sum_nr_running;
 			sds->busiest_group_capacity = sgs.group_capacity;
 			sds->busiest_load_per_task = sgs.sum_weighted_load;
+			sds->busiest_has_capacity = sgs.group_has_capacity;
 			sds->group_imb = sgs.group_imb;
 		}
 
@@ -2754,6 +2766,15 @@ static inline void calculate_imbalance(s
 		return fix_small_imbalance(sds, this_cpu, imbalance);
 
 }
+
+bool check_utilization(struct sd_lb_stats *sds)
+{
+	if (!sds->this_has_capacity || sds->busiest_has_capacity)
+		return false;
+
+	return true;
+}
+
 /******* find_busiest_group() helpers end here *********************/
 
 /**
@@ -2816,6 +2837,10 @@ find_busiest_group(struct sched_domain *
 	if (!sds.busiest || sds.busiest_nr_running == 0)
 		goto out_balanced;
 
+	/*  SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
+	if (idle == CPU_NEWLY_IDLE && check_utilization(&sds))
+		goto force_balance;
+
 	if (sds.this_load >= sds.max_load)
 		goto out_balanced;
 
@@ -2827,6 +2852,7 @@ find_busiest_group(struct sched_domain *
 	if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
 		goto out_balanced;
 
+force_balance:
 	/* Looks like there is an imbalance. Compute it */
 	calculate_imbalance(&sds, this_cpu, imbalance);
 	return sds.busiest;
@@ -3153,10 +3179,8 @@ static void idle_balance(int this_cpu, s
 		interval = msecs_to_jiffies(sd->balance_interval);
 		if (time_after(next_balance, sd->last_balance + interval))
 			next_balance = sd->last_balance + interval;
-		if (pulled_task) {
-			this_rq->idle_stamp = 0;
+		if (pulled_task)
 			break;
-		}
 	}
 
 	raw_spin_lock(&this_rq->lock);



      reply	other threads:[~2010-10-10 10:15 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-28  0:29 [PATCH 0/3][RFC] Improve load balancing when tasks have large weight differential Nikhil Rao
2010-09-28  0:29 ` [PATCH 1/3] sched: set group_imb only a task can be pulled from the busiest cpu Nikhil Rao
2010-09-28  0:29 ` [PATCH 2/3] sched: drop group_capacity to 1 only if remote group has no running tasks Nikhil Rao
2010-09-28 23:04   ` Suresh Siddha
2010-10-11 21:20     ` Nikhil Rao
2010-09-28  0:29 ` [PATCH 3/3] sched: do not consider SCHED_IDLE tasks to be cache hot Nikhil Rao
2010-09-28 13:57 ` [PATCH 0/3][RFC] Improve load balancing when tasks have large weight differential Mike Galbraith
2010-09-28 21:15   ` Nikhil Rao
2010-09-29  1:45     ` Mike Galbraith
2010-09-29 19:32       ` Nikhil Rao
2010-10-04  3:08         ` Mike Galbraith
2010-10-06  8:23           ` Nikhil Rao
2010-10-08  7:22             ` Mike Galbraith
2010-10-08 20:34               ` Nikhil Rao
2010-10-10 10:15                 ` Mike Galbraith [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1286705734.7478.22.camel@marge.simson.net \
    --to=efault@gmx.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=ncrao@google.com \
    --cc=peterz@infradead.org \
    --cc=venki@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.