All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>,
	Gregory Haskins <ghaskins@novell.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Christoph Lameter <clameter@sgi.com>,
	Steven Rostedt <srostedt@redhat.com>
Subject: [PATCH v4 05/20] pull RT tasks
Date: Tue, 20 Nov 2007 20:00:59 -0500	[thread overview]
Message-ID: <20071121011249.662344517@goodmis.org> (raw)
In-Reply-To: 20071121010054.663842380@goodmis.org

[-- Attachment #1: rt-balance-pull-tasks.patch --]
[-- Type: text/plain, Size: 7868 bytes --]

This patch adds the algorithm to pull tasks from RT overloaded runqueues.

When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---

 kernel/sched.c    |    2 
 kernel/sched_rt.c |  187 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 178 insertions(+), 11 deletions(-)

Index: linux-compile.git/kernel/sched.c
===================================================================
--- linux-compile.git.orig/kernel/sched.c	2007-11-20 19:52:56.000000000 -0500
+++ linux-compile.git/kernel/sched.c	2007-11-20 19:52:59.000000000 -0500
@@ -3646,6 +3646,8 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
+	schedule_balance_rt(rq, prev);
+
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
 
Index: linux-compile.git/kernel/sched_rt.c
===================================================================
--- linux-compile.git.orig/kernel/sched_rt.c	2007-11-20 19:52:57.000000000 -0500
+++ linux-compile.git/kernel/sched_rt.c	2007-11-20 19:52:59.000000000 -0500
@@ -176,8 +176,17 @@ static void put_prev_task_rt(struct rq *
 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 
+static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
+{
+	if (!task_running(rq, p) &&
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+		return 1;
+	return 0;
+}
+
 /* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
+						     int cpu)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 	struct task_struct *next;
@@ -196,26 +205,36 @@ static struct task_struct *pick_next_hig
 	}
 
 	queue = array->queue + idx;
+	BUG_ON(list_empty(queue));
+
 	next = list_entry(queue->next, struct task_struct, run_list);
-	if (unlikely(next != rq->curr))
-		return next;
+	if (unlikely(pick_rt_task(rq, next, cpu)))
+		goto out;
 
 	if (queue->next->next != queue) {
 		/* same prio task */
 		next = list_entry(queue->next->next, struct task_struct, run_list);
-		return next;
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
 	}
 
+ retry:
 	/* slower, but more flexible */
 	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-	if (unlikely(idx >= MAX_RT_PRIO)) {
-		WARN_ON(1); /* rt_nr_running was 2 and above! */
+	if (unlikely(idx >= MAX_RT_PRIO))
 		return NULL;
-	}
 
 	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
+	BUG_ON(list_empty(queue));
+
+	list_for_each_entry(next, queue, run_list) {
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
+	}
+
+	goto retry;
 
+ out:
 	return next;
 }
 
@@ -302,13 +321,15 @@ static int push_rt_task(struct rq *this_
 
 	assert_spin_locked(&this_rq->lock);
 
-	next_task = pick_next_highest_task_rt(this_rq);
+	next_task = pick_next_highest_task_rt(this_rq, -1);
 	if (!next_task)
 		return 0;
 
  retry:
-	if (unlikely(next_task == this_rq->curr))
+	if (unlikely(next_task == this_rq->curr)) {
+		WARN_ON(1);
 		return 0;
+	}
 
 	/*
 	 * It's possible that the next_task slipped in of
@@ -332,7 +353,7 @@ static int push_rt_task(struct rq *this_
 		 * so it is possible that next_task has changed.
 		 * If it has, then try again.
 		 */
-		task = pick_next_highest_task_rt(this_rq);
+		task = pick_next_highest_task_rt(this_rq, -1);
 		if (unlikely(task != next_task) && task && paranoid--) {
 			put_task_struct(next_task);
 			next_task = task;
@@ -375,6 +396,149 @@ static void push_rt_tasks(struct rq *rq)
 		;
 }
 
+static int pull_rt_task(struct rq *this_rq)
+{
+	struct task_struct *next;
+	struct task_struct *p;
+	struct rq *src_rq;
+	cpumask_t *rto_cpumask;
+	int this_cpu = this_rq->cpu;
+	int cpu;
+	int ret = 0;
+
+	assert_spin_locked(&this_rq->lock);
+
+	/*
+	 * If cpusets are used, and we have overlapping
+	 * run queue cpusets, then this algorithm may not catch all.
+	 * This is just the price you pay on trying to keep
+	 * dirtying caches down on large SMP machines.
+	 */
+	if (likely(!rt_overloaded()))
+		return 0;
+
+	next = pick_next_task_rt(this_rq);
+
+	rto_cpumask = rt_overload();
+
+	for_each_cpu_mask(cpu, *rto_cpumask) {
+		if (this_cpu == cpu)
+			continue;
+
+		src_rq = cpu_rq(cpu);
+		if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
+			/*
+			 * It is possible that overlapping cpusets
+			 * will miss clearing a non overloaded runqueue.
+			 * Clear it now.
+			 */
+			if (double_lock_balance(this_rq, src_rq)) {
+				/* unlocked our runqueue lock */
+				struct task_struct *old_next = next;
+				next = pick_next_task_rt(this_rq);
+				if (next != old_next)
+					ret = 1;
+			}
+			if (likely(src_rq->rt.rt_nr_running <= 1))
+				/*
+				 * Small chance that this_rq->curr changed
+				 * but it's really harmless here.
+				 */
+				rt_clear_overload(this_rq);
+			else
+				/*
+				 * Heh, the src_rq is now overloaded, since
+				 * we already have the src_rq lock, go straight
+				 * to pulling tasks from it.
+				 */
+				goto try_pulling;
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+		/*
+		 * We can potentially drop this_rq's lock in
+		 * double_lock_balance, and another CPU could
+		 * steal our next task - hence we must cause
+		 * the caller to recalculate the next task
+		 * in that case:
+		 */
+		if (double_lock_balance(this_rq, src_rq)) {
+			struct task_struct *old_next = next;
+			next = pick_next_task_rt(this_rq);
+			if (next != old_next)
+				ret = 1;
+		}
+
+		/*
+		 * Are there still pullable RT tasks?
+		 */
+		if (src_rq->rt.rt_nr_running <= 1) {
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+ try_pulling:
+		p = pick_next_highest_task_rt(src_rq, this_cpu);
+
+		/*
+		 * Do we have an RT task that preempts
+		 * the to-be-scheduled task?
+		 */
+		if (p && (!next || (p->prio < next->prio))) {
+			WARN_ON(p == src_rq->curr);
+			WARN_ON(!p->se.on_rq);
+
+			/*
+			 * There's a chance that p is higher in priority
+			 * than what's currently running on its cpu.
+			 * This is just that p is wakeing up and hasn't
+			 * had a chance to schedule. We only pull
+			 * p if it is lower in priority than the
+			 * current task on the run queue or
+			 * this_rq next task is lower in prio than
+			 * the current task on that rq.
+			 */
+			if (p->prio < src_rq->curr->prio ||
+			    (next && next->prio < src_rq->curr->prio))
+				goto bail;
+
+			ret = 1;
+
+			deactivate_task(src_rq, p, 0);
+			set_task_cpu(p, this_cpu);
+			activate_task(this_rq, p, 0);
+			/*
+			 * We continue with the search, just in
+			 * case there's an even higher prio task
+			 * in another runqueue. (low likelyhood
+			 * but possible)
+			 */
+
+			/*
+			 * Update next so that we won't pick a task
+			 * on another cpu with a priority lower (or equal)
+			 * than the one we just picked.
+			 */
+			next = p;
+
+		}
+ bail:
+		spin_unlock(&src_rq->lock);
+	}
+
+	return ret;
+}
+
+static void schedule_balance_rt(struct rq *rq,
+				struct task_struct *prev)
+{
+	/* Try to pull RT tasks here if we lower this rq's prio */
+	if (unlikely(rt_task(prev)) &&
+	    rq->rt.highest_prio > prev->prio)
+		pull_rt_task(rq);
+}
+
 static void schedule_tail_balance_rt(struct rq *rq)
 {
 	/*
@@ -497,6 +661,7 @@ move_one_task_rt(struct rq *this_rq, int
 }
 #else /* CONFIG_SMP */
 # define schedule_tail_balance_rt(rq)	do { } while (0)
+# define schedule_balance_rt(rq, prev)	do { } while (0)
 #endif /* CONFIG_SMP */
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p)

-- 

  parent reply	other threads:[~2007-11-21  1:22 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-11-21  1:00 [PATCH v4 00/20] New RT Balancing version 4 Steven Rostedt
2007-11-21  1:00 ` [PATCH v4 01/20] Add rt_nr_running accounting Steven Rostedt
2007-11-21  1:00 ` [PATCH v4 02/20] track highest prio queued on runqueue Steven Rostedt
2007-11-21  1:00 ` [PATCH v4 03/20] push RT tasks Steven Rostedt
2007-11-21  1:00 ` [PATCH v4 04/20] RT overloaded runqueues accounting Steven Rostedt
2007-11-21  1:00 ` Steven Rostedt [this message]
2007-11-21  1:01 ` [PATCH v4 06/20] wake up balance RT Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 07/20] disable CFS RT load balancing Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 08/20] Cache cpus_allowed weight for optimizing migration Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 09/20] RT: Consistency cleanup for this_rq usage Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 10/20] RT: Remove some CFS specific code from the wakeup path of RT tasks Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 11/20] RT: Break out the search function Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 12/20] RT: Allow current_cpu to be included in search Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 13/20] RT: Pre-route RT tasks on wakeup Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 14/20] RT: Optimize our cpu selection based on topology Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 15/20] RT: Optimize rebalancing Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 16/20] Avoid overload Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 17/20] RT: restore the migratable conditional Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 18/20] Optimize cpu search with hamming weight Steven Rostedt
2007-11-21  1:01 ` [PATCH v4 19/20] Optimize out cpu_clears Steven Rostedt
2007-11-21  2:10   ` Steven Rostedt
2007-11-21  3:10     ` [PATCH] Fix optimized search Gregory Haskins
2007-11-21  4:15       ` Steven Rostedt
2007-11-21  4:26         ` Steven Rostedt
2007-11-21  5:14           ` Gregory Haskins
2007-11-21  1:01 ` [PATCH v4 20/20] balance RT tasks no new wake up Steven Rostedt
2007-11-21  4:44 ` [PATCH 0/4] more RT balancing enhancements Gregory Haskins
2007-11-21  4:44   ` [PATCH 1/4] Fix optimized search Gregory Haskins
2007-11-21  4:44   ` [PATCH 2/4] RT: Add sched-domain roots Gregory Haskins
2007-11-21  4:44   ` [PATCH 3/4] RT: Only balance our RT tasks within our root-domain Gregory Haskins
2007-11-21  4:44   ` [PATCH 4/4] RT: Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins
2007-11-21 19:51   ` [PATCH 0/4] more RT balancing enhancements v6a Gregory Haskins
2007-11-21 19:52     ` [PATCH 1/4] SCHED: Add sched-domain roots Gregory Haskins
2007-11-21 19:52     ` [PATCH 2/4] SCHED: Track online cpus in the root-domain Gregory Haskins
2007-11-21 19:52     ` [PATCH 3/4] SCHED: Only balance our RT tasks within our root-domain Gregory Haskins
2007-11-21 19:52     ` [PATCH 4/4] SCHED: Use a 2-d bitmap for searching lowest-pri CPU Gregory Haskins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071121011249.662344517@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=clameter@sgi.com \
    --cc=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=srostedt@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.