[PATCH -v2 5/7] pull RT tasks

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>, RT <linux-rt-users@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@elte.hu>,
	Dmitry Adamushko <dmitry.adamushko@gmail.com>,
	Paul Jackson <pj@sgi.com>, Gregory Haskins <ghaskins@novell.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH -v2 5/7] pull RT tasks
Date: Mon, 22 Oct 2007 22:59:05 -0400	[thread overview]
Message-ID: <20071023032916.808883356@goodmis.org> (raw)
In-Reply-To: 20071023025900.927578809@goodmis.org

[-- Attachment #1: rt-balance-pull-tasks.patch --]
[-- Type: text/plain, Size: 8017 bytes --]

This patch adds the algorithm to pull tasks from RT overloaded runqueues.

When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

---
 kernel/sched.c    |    2 
 kernel/sched_rt.c |  196 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 187 insertions(+), 11 deletions(-)

Index: linux-test.git/kernel/sched.c
===================================================================
--- linux-test.git.orig/kernel/sched.c	2007-10-22 22:31:59.000000000 -0400
+++ linux-test.git/kernel/sched.c	2007-10-22 22:37:28.000000000 -0400
@@ -3622,6 +3622,8 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
+	schedule_balance_rt(rq, prev);
+
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
 
Index: linux-test.git/kernel/sched_rt.c
===================================================================
--- linux-test.git.orig/kernel/sched_rt.c	2007-10-22 22:32:18.000000000 -0400
+++ linux-test.git/kernel/sched_rt.c	2007-10-22 22:37:28.000000000 -0400
@@ -168,8 +168,17 @@ static void put_prev_task_rt(struct rq *
 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 
+static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
+{
+	if (!task_running(rq, p) &&
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+		return 1;
+	return 0;
+}
+
 /* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
+						     int cpu)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 	struct task_struct *next;
@@ -188,26 +197,36 @@ static struct task_struct *pick_next_hig
 	}
 
 	queue = array->queue + idx;
+	BUG_ON(list_empty(queue));
+
 	next = list_entry(queue->next, struct task_struct, run_list);
-	if (unlikely(next != rq->curr))
-		return next;
+	if (unlikely(pick_rt_task(rq, next, cpu)))
+		goto out;
 
 	if (queue->next->next != queue) {
 		/* same prio task */
 		next = list_entry(queue->next->next, struct task_struct, run_list);
-		return next;
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
 	}
 
+ retry:
 	/* slower, but more flexible */
 	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-	if (unlikely(idx >= MAX_RT_PRIO)) {
-		WARN_ON(1); /* rt_nr_running was 2 and above! */
+	if (unlikely(idx >= MAX_RT_PRIO))
 		return NULL;
-	}
 
 	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
+	BUG_ON(list_empty(queue));
 
+	list_for_each_entry(next, queue, run_list) {
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
+	}
+
+	goto retry;
+
+ out:
 	return next;
 }
 
@@ -296,13 +315,15 @@ static int push_rt_task(struct rq *this_
 
 	assert_spin_locked(&this_rq->lock);
 
-	next_task = pick_next_highest_task_rt(this_rq);
+	next_task = pick_next_highest_task_rt(this_rq, -1);
 	if (!next_task)
 		return 0;
 
  retry:
-	if (unlikely(next_task == this_rq->curr))
+	if (unlikely(next_task == this_rq->curr)) {
+		WARN_ON(1);
 		return 0;
+	}
 
 	/*
 	 * It's possible that the next_task slipped in of
@@ -326,7 +347,7 @@ static int push_rt_task(struct rq *this_
 		 * so it is possible that next_task has changed.
 		 * If it has, then try again.
 		 */
-		task = pick_next_highest_task_rt(this_rq);
+		task = pick_next_highest_task_rt(this_rq, -1);
 		if (unlikely(task != next_task) && task && paranoid--) {
 			put_task_struct(next_task);
 			next_task = task;
@@ -371,6 +392,158 @@ static void push_rt_tasks(struct rq *rq)
 		;
 }
 
+static int pull_rt_task(struct rq *this_rq)
+{
+	struct task_struct *next;
+	struct task_struct *p;
+	struct rq *src_rq;
+	cpumask_t rto_cpumask;
+	int this_cpu = this_rq->cpu;
+	int cpu;
+	int ret = 0;
+
+	assert_spin_locked(&this_rq->lock);
+
+	/*
+	 * If cpusets are used, and we have overlapping
+	 * run queue cpusets, then this algorithm may not catch all.
+	 * This is just the price you pay on trying to keep
+	 * dirtying caches down on large SMP machines.
+	 */
+	if (likely(!rt_overloaded(this_rq->curr)))
+		return 0;
+
+	next = pick_next_task_rt(this_rq);
+
+	rto_cpumask = rt_overload(this_rq->curr);
+
+	for_each_cpu_mask(cpu, rto_cpumask) {
+		if (this_cpu == cpu)
+			continue;
+
+		src_rq = cpu_rq(cpu);
+		if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
+			/*
+			 * It is possible that overlapping cpusets
+			 * will miss clearing a non overloaded runqueue.
+			 * Clear it now.
+			 */
+			if (double_lock_balance(this_rq, src_rq)) {
+				/* unlocked our runqueue lock */
+				struct task_struct *old_next = next;
+				next = pick_next_task_rt(this_rq);
+				if (next != old_next)
+					ret = 1;
+			}
+			if (likely(src_rq->rt.rt_nr_running <= 1))
+				/*
+				 * Small chance that this_rq->curr changed
+				 * but it's really harmless here.
+				 */
+				rt_clear_overload(this_rq->curr, this_rq->cpu);
+			else
+				/*
+				 * Heh, the src_rq is now overloaded, since
+				 * we already have the src_rq lock, go straight
+				 * to pulling tasks from it.
+				 */
+				goto try_pulling;
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+		/*
+		 * We can potentially drop this_rq's lock in
+		 * double_lock_balance, and another CPU could
+		 * steal our next task - hence we must cause
+		 * the caller to recalculate the next task
+		 * in that case:
+		 */
+		if (double_lock_balance(this_rq, src_rq)) {
+			struct task_struct *old_next = next;
+			next = pick_next_task_rt(this_rq);
+			if (next != old_next)
+				ret = 1;
+		}
+
+		/*
+		 * Are there still pullable RT tasks?
+		 */
+		if (src_rq->rt.rt_nr_running <= 1) {
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+ try_pulling:
+		p = pick_next_highest_task_rt(src_rq, this_cpu);
+
+		/*
+		 * Do we have an RT task that preempts
+		 * the to-be-scheduled task?
+		 */
+		if (p && (!next || (p->prio < next->prio))) {
+			WARN_ON(p == src_rq->curr);
+			WARN_ON(!p->se.on_rq);
+
+			/*
+			 * There's a chance that p is higher in priority
+			 * than what's currently running on its cpu.
+			 * This is just that p is wakeing up and hasn't
+			 * had a chance to schedule. We only pull
+			 * p if it is lower in priority than the
+			 * current task on the run queue or
+			 * this_rq next task is lower in prio than
+			 * the current task on that rq.
+			 */
+			if (p->prio < src_rq->curr->prio ||
+			    (next && next->prio < src_rq->curr->prio))
+				goto bail;
+
+			ret = 1;
+
+			deactivate_task(src_rq, p, 0);
+			set_task_cpu(p, this_cpu);
+			activate_task(this_rq, p, 0);
+			/*
+			 * We continue with the search, just in
+			 * case there's an even higher prio task
+			 * in another runqueue. (low likelyhood
+			 * but possible)
+			 */
+
+			/*
+			 * Update next so that we won't pick a task
+			 * on another cpu with a priority lower (or equal)
+			 * than the one we just picked.
+			 */
+			next = p;
+
+		}
+ bail:
+		spin_unlock(&src_rq->lock);
+	}
+
+	return ret;
+}
+
+static void schedule_balance_rt(struct rq *rq,
+				struct task_struct *prev)
+{
+	struct rt_prio_array *array;
+	int next_prio;
+
+	/* Try to pull RT tasks here if we lower this rq's prio */
+	if (unlikely(rt_task(prev))) {
+		next_prio = MAX_RT_PRIO;
+		if (rq->rt.rt_nr_running) {
+			array = &rq->rt.active;
+			next_prio = sched_find_first_bit(array->bitmap);
+		}
+		if (next_prio > prev->prio)
+			pull_rt_task(rq);
+	}
+}
+
 static void schedule_tail_balance_rt(struct rq *rq)
 {
 	/*
@@ -388,6 +561,7 @@ static void schedule_tail_balance_rt(str
 }
 #else /* CONFIG_SMP */
 # define schedule_tail_balance_rt(rq)	do { } while (0)
+# define schedule_balance_rt(rq)	do { } while (0)
 #endif /* CONFIG_SMP */
 
 

--

next prev parent reply	other threads:[~2007-10-23  3:29 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-23  2:59 [PATCH -v2 0/7] New RT Task Balancing -v2 Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 1/7] Add rt_nr_running accounting Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 2/7] track highest prio queued on runqueue Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 3/7] push RT tasks Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 4/7] RT overloaded runqueues accounting Steven Rostedt
2007-10-23  4:17   ` Paul Jackson
2007-10-23  6:11     ` Paul Menage
2007-10-23  6:19       ` Paul Jackson
2007-10-23 13:43       ` Steven Rostedt
2007-10-23 21:46         ` Paul Jackson
2008-01-29 13:00           ` Paul Jackson
2007-10-23  2:59 ` Steven Rostedt [this message]
2007-10-23  2:59 ` [PATCH -v2 6/7] wake up balance RT Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 7/7] disable CFS RT load balancing Steven Rostedt
2007-10-23  8:39 ` [PATCH -v2 0/7] New RT Task Balancing -v2 Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071023032916.808883356@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=dmitry.adamushko@gmail.com \
    --cc=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pj@sgi.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox