[PATCH v2 5/5] sched/fair: Remove double_lock_balance() from load_balance()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Kirill Tkhai <tkhai@yandex.ru>
To: linux-kernel@vger.kernel.org
Cc: nicolas.pitre@linaro.org, peterz@infradead.org, pjt@google.com,
	oleg@redhat.com, rostedt@goodmis.org, umgwanakikbuti@gmail.com,
	ktkhai@parallels.com, tim.c.chen@linux.intel.com,
	mingo@kernel.org
Subject: [PATCH v2 5/5] sched/fair: Remove double_lock_balance() from load_balance()
Date: Sat, 26 Jul 2014 18:59:52 +0400	[thread overview]
Message-ID: <20140726145949.6308.12411.stgit@localhost> (raw)
In-Reply-To: <20140726145508.6308.69121.stgit@localhost>

Keep on_rq = ONRQ_MIGRATING, while task is migrating, instead.

v2: Added missed check_preempt_curr() in attach_tasks().

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
---
 kernel/sched/fair.c |   85 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 30 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a1b74f2..a47fb3f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4706,9 +4706,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		return;
 
 	/*
-	 * This is possible from callers such as move_task(), in which we
-	 * unconditionally check_prempt_curr() after an enqueue (which may have
-	 * lead to a throttle).  This both saves work and prevents false
+	 * This is possible from callers, in which we unconditionally
+	 * check_prempt_curr() after an enqueue (which may have lead
+	 * to a throttle).  This both saves work and prevents false
 	 * next-buddy nomination below.
 	 */
 	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
@@ -5114,20 +5114,22 @@ struct lb_env {
 	unsigned int		loop_max;
 
 	enum fbq_type		fbq_type;
+	struct list_head	tasks;
 };
 
 /*
- * move_task - move a task from one runqueue to another runqueue.
- * Both runqueues must be locked.
+ * detach_task - detach a task from its runqueue for migration.
+ * The runqueue must be locked.
  */
-static void move_task(struct task_struct *p, struct lb_env *env)
+static void detach_task(struct task_struct *p, struct lb_env *env)
 {
 	deactivate_task(env->src_rq, p, 0);
+	list_add(&p->se.group_node, &env->tasks);
+	p->on_rq = ONRQ_MIGRATING;
 	set_task_cpu(p, env->dst_cpu);
-	activate_task(env->dst_rq, p, 0);
-	check_preempt_curr(env->dst_rq, p, 0);
 }
 
+
 /*
  * Is this task likely cache-hot?
  *
@@ -5362,9 +5364,9 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 
 		/*
 		 * Right now, this is only the second place where
-		 * lb_gained[env->idle] is updated (other is move_tasks)
+		 * lb_gained[env->idle] is updated (other is detach_tasks)
 		 * so we can safely collect stats here rather than
-		 * inside move_tasks().
+		 * inside detach_tasks().
 		 */
 		schedstat_inc(env->sd, lb_gained[env->idle]);
 		return p;
@@ -5375,18 +5377,18 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 static const unsigned int sched_nr_migrate_break = 32;
 
 /*
- * move_tasks tries to move up to imbalance weighted load from busiest to
- * this_rq, as part of a balancing operation within domain "sd".
- * Returns 1 if successful and 0 otherwise.
+ * detach_tasks tries to detach up to imbalance weighted load from busiest_rq,
+ * as part of a balancing operation within domain "sd".
+ * Returns number of detached tasks if successful and 0 otherwise.
  *
- * Called with both runqueues locked.
+ * Called with env->src_rq locked.
  */
-static int move_tasks(struct lb_env *env)
+static int detach_tasks(struct lb_env *env)
 {
 	struct list_head *tasks = &env->src_rq->cfs_tasks;
 	struct task_struct *p;
 	unsigned long load;
-	int pulled = 0;
+	int detached = 0;
 
 	if (env->imbalance <= 0)
 		return 0;
@@ -5417,14 +5419,15 @@ static int move_tasks(struct lb_env *env)
 		if ((load / 2) > env->imbalance)
 			goto next;
 
-		move_task(p, env);
-		pulled++;
+		detach_task(p, env);
+
+		detached++;
 		env->imbalance -= load;
 
 #ifdef CONFIG_PREEMPT
 		/*
 		 * NEWIDLE balancing is a source of latency, so preemptible
-		 * kernels will stop after the first task is pulled to minimize
+		 * kernels will stop after the first task is detached to minimize
 		 * the critical section.
 		 */
 		if (env->idle == CPU_NEWLY_IDLE)
@@ -5444,13 +5447,28 @@ static int move_tasks(struct lb_env *env)
 	}
 
 	/*
-	 * Right now, this is one of only two places move_task() is called,
-	 * so we can safely collect move_task() stats here rather than
-	 * inside move_task().
+	 * Right now, this is one of only two places we collect this stat
+	 * so we can safely collect detach_one_task() stats here rather
+	 * than inside detach_one_task().
 	 */
-	schedstat_add(env->sd, lb_gained[env->idle], pulled);
+	schedstat_add(env->sd, lb_gained[env->idle], detached);
+
+	return detached;
+}
+
+static void attach_tasks(struct lb_env *env)
+{
+	struct list_head *tasks = &env->tasks;
+	struct task_struct *p;
 
-	return pulled;
+	while (!list_empty(tasks)) {
+		p = list_first_entry(tasks, struct task_struct, se.group_node);
+		BUG_ON(task_rq(p) != env->dst_rq);
+		list_del_init(&p->se.group_node);
+		p->on_rq = ONRQ_QUEUED;
+		activate_task(env->dst_rq, p, 0);
+		check_preempt_curr(env->dst_rq, p, 0);
+	}
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6559,6 +6577,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.loop_break	= sched_nr_migrate_break,
 		.cpus		= cpus,
 		.fbq_type	= all,
+		.tasks		= LIST_HEAD_INIT(env.tasks),
 	};
 
 	/*
@@ -6608,16 +6627,22 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
-		local_irq_save(flags);
-		double_rq_lock(env.dst_rq, busiest);
+		raw_spin_lock_irqsave(&busiest->lock, flags);
 
 		/*
 		 * cur_ld_moved - load moved in current iteration
 		 * ld_moved     - cumulative load moved across iterations
 		 */
-		cur_ld_moved = move_tasks(&env);
-		ld_moved += cur_ld_moved;
-		double_rq_unlock(env.dst_rq, busiest);
+		cur_ld_moved = detach_tasks(&env);
+		raw_spin_unlock(&busiest->lock);
+
+		if (cur_ld_moved) {
+			raw_spin_lock(&env.dst_rq->lock);
+			attach_tasks(&env);
+			raw_spin_unlock(&env.dst_rq->lock);
+			ld_moved += cur_ld_moved;
+		}
+
 		local_irq_restore(flags);
 
 		/*
@@ -6753,7 +6778,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		 * If we've begun active balancing, start to back off. This
 		 * case may not be covered by the all_pinned logic if there
 		 * is only 1 task on the busy runqueue (because we don't call
-		 * move_tasks).
+		 * detach_tasks).
 		 */
 		if (sd->balance_interval < sd->max_interval)
 			sd->balance_interval *= 2;

next prev parent reply	other threads:[~2014-07-26 15:00 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-26 14:58 [PATCH v2 0/5] sched: Add on_rq states and remove several double rq locks Kirill Tkhai
2014-07-26 14:59 ` [PATCH v2 1/5] sched: Wrapper for checking task_struct's .on_rq Kirill Tkhai
2014-07-26 14:59 ` [PATCH v2 2/5] sched: Teach scheduler to understand ONRQ_MIGRATING state Kirill Tkhai
2014-07-28  8:01   ` Peter Zijlstra
2014-07-28  9:05     ` Kirill Tkhai
2014-07-29  9:53       ` Kirill Tkhai
2014-07-29 12:38         ` Peter Zijlstra
2014-07-29 16:19         ` Oleg Nesterov
2014-07-30  8:04           ` Kirill Tkhai
2014-07-30 14:41             ` Oleg Nesterov
2014-07-30 21:25               ` Kirill Tkhai
2014-07-26 14:59 ` [PATCH v2 3/5] sched: Remove double_rq_lock() from __migrate_task() Kirill Tkhai
2014-07-26 14:59 ` [PATCH v2 4/5] sched/fair: Remove double_lock_balance() from active_load_balance_cpu_stop() Kirill Tkhai
2014-07-26 14:59 ` Kirill Tkhai [this message]
2014-07-29 12:57   ` [PATCH v2 5/5] sched/fair: Remove double_lock_balance() from load_balance() Peter Zijlstra
2014-07-26 19:39 ` [PATCH v2 0/5] sched: Add on_rq states and remove several double rq locks Oleg Nesterov
2014-07-27 21:26   ` Kirill Tkhai
2014-07-28 13:19     ` Oleg Nesterov

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a1b74f2 dfblob:a47fb3f )
 OR (
bs:"[PATCH v2 5/5] sched/fair: Remove double_lock_balance() from load_balance()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140726145949.6308.12411.stgit@localhost \
    --to=tkhai@yandex.ru \
    --cc=ktkhai@parallels.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=nicolas.pitre@linaro.org \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rostedt@goodmis.org \
    --cc=tim.c.chen@linux.intel.com \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.