All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: LKML <linux-kernel@vger.kernel.org>, RT <linux-rt-users@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Ingo Molnar <mingo@elte.hu>,
	Dmitry Adamushko <dmitry.adamushko@gmail.com>,
	Paul Jackson <pj@sgi.com>, Gregory Haskins <ghaskins@novell.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH -v2 5/7] pull RT tasks
Date: Mon, 22 Oct 2007 22:59:05 -0400	[thread overview]
Message-ID: <20071023032916.808883356@goodmis.org> (raw)
In-Reply-To: 20071023025900.927578809@goodmis.org

[-- Attachment #1: rt-balance-pull-tasks.patch --]
[-- Type: text/plain, Size: 8017 bytes --]

This patch adds the algorithm to pull tasks from RT overloaded runqueues.

When a pull RT is initiated, all overloaded runqueues are examined for
a RT task that is higher in prio than the highest prio task queued on the
target runqueue. If another runqueue holds a RT task that is of higher
prio than the highest prio task on the target runqueue is found it is pulled
to the target runqueue.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>

---
 kernel/sched.c    |    2 
 kernel/sched_rt.c |  196 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 187 insertions(+), 11 deletions(-)

Index: linux-test.git/kernel/sched.c
===================================================================
--- linux-test.git.orig/kernel/sched.c	2007-10-22 22:31:59.000000000 -0400
+++ linux-test.git/kernel/sched.c	2007-10-22 22:37:28.000000000 -0400
@@ -3622,6 +3622,8 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
+	schedule_balance_rt(rq, prev);
+
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
 
Index: linux-test.git/kernel/sched_rt.c
===================================================================
--- linux-test.git.orig/kernel/sched_rt.c	2007-10-22 22:32:18.000000000 -0400
+++ linux-test.git/kernel/sched_rt.c	2007-10-22 22:37:28.000000000 -0400
@@ -168,8 +168,17 @@ static void put_prev_task_rt(struct rq *
 static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 
+static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
+{
+	if (!task_running(rq, p) &&
+	    (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
+		return 1;
+	return 0;
+}
+
 /* Return the second highest RT task, NULL otherwise */
-static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
+static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
+						     int cpu)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 	struct task_struct *next;
@@ -188,26 +197,36 @@ static struct task_struct *pick_next_hig
 	}
 
 	queue = array->queue + idx;
+	BUG_ON(list_empty(queue));
+
 	next = list_entry(queue->next, struct task_struct, run_list);
-	if (unlikely(next != rq->curr))
-		return next;
+	if (unlikely(pick_rt_task(rq, next, cpu)))
+		goto out;
 
 	if (queue->next->next != queue) {
 		/* same prio task */
 		next = list_entry(queue->next->next, struct task_struct, run_list);
-		return next;
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
 	}
 
+ retry:
 	/* slower, but more flexible */
 	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-	if (unlikely(idx >= MAX_RT_PRIO)) {
-		WARN_ON(1); /* rt_nr_running was 2 and above! */
+	if (unlikely(idx >= MAX_RT_PRIO))
 		return NULL;
-	}
 
 	queue = array->queue + idx;
-	next = list_entry(queue->next, struct task_struct, run_list);
+	BUG_ON(list_empty(queue));
 
+	list_for_each_entry(next, queue, run_list) {
+		if (pick_rt_task(rq, next, cpu))
+			goto out;
+	}
+
+	goto retry;
+
+ out:
 	return next;
 }
 
@@ -296,13 +315,15 @@ static int push_rt_task(struct rq *this_
 
 	assert_spin_locked(&this_rq->lock);
 
-	next_task = pick_next_highest_task_rt(this_rq);
+	next_task = pick_next_highest_task_rt(this_rq, -1);
 	if (!next_task)
 		return 0;
 
  retry:
-	if (unlikely(next_task == this_rq->curr))
+	if (unlikely(next_task == this_rq->curr)) {
+		WARN_ON(1);
 		return 0;
+	}
 
 	/*
 	 * It's possible that the next_task slipped in of
@@ -326,7 +347,7 @@ static int push_rt_task(struct rq *this_
 		 * so it is possible that next_task has changed.
 		 * If it has, then try again.
 		 */
-		task = pick_next_highest_task_rt(this_rq);
+		task = pick_next_highest_task_rt(this_rq, -1);
 		if (unlikely(task != next_task) && task && paranoid--) {
 			put_task_struct(next_task);
 			next_task = task;
@@ -371,6 +392,158 @@ static void push_rt_tasks(struct rq *rq)
 		;
 }
 
+static int pull_rt_task(struct rq *this_rq)
+{
+	struct task_struct *next;
+	struct task_struct *p;
+	struct rq *src_rq;
+	cpumask_t rto_cpumask;
+	int this_cpu = this_rq->cpu;
+	int cpu;
+	int ret = 0;
+
+	assert_spin_locked(&this_rq->lock);
+
+	/*
+	 * If cpusets are used, and we have overlapping
+	 * run queue cpusets, then this algorithm may not catch all.
+	 * This is just the price you pay on trying to keep
+	 * dirtying caches down on large SMP machines.
+	 */
+	if (likely(!rt_overloaded(this_rq->curr)))
+		return 0;
+
+	next = pick_next_task_rt(this_rq);
+
+	rto_cpumask = rt_overload(this_rq->curr);
+
+	for_each_cpu_mask(cpu, rto_cpumask) {
+		if (this_cpu == cpu)
+			continue;
+
+		src_rq = cpu_rq(cpu);
+		if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
+			/*
+			 * It is possible that overlapping cpusets
+			 * will miss clearing a non overloaded runqueue.
+			 * Clear it now.
+			 */
+			if (double_lock_balance(this_rq, src_rq)) {
+				/* unlocked our runqueue lock */
+				struct task_struct *old_next = next;
+				next = pick_next_task_rt(this_rq);
+				if (next != old_next)
+					ret = 1;
+			}
+			if (likely(src_rq->rt.rt_nr_running <= 1))
+				/*
+				 * Small chance that this_rq->curr changed
+				 * but it's really harmless here.
+				 */
+				rt_clear_overload(this_rq->curr, this_rq->cpu);
+			else
+				/*
+				 * Heh, the src_rq is now overloaded, since
+				 * we already have the src_rq lock, go straight
+				 * to pulling tasks from it.
+				 */
+				goto try_pulling;
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+		/*
+		 * We can potentially drop this_rq's lock in
+		 * double_lock_balance, and another CPU could
+		 * steal our next task - hence we must cause
+		 * the caller to recalculate the next task
+		 * in that case:
+		 */
+		if (double_lock_balance(this_rq, src_rq)) {
+			struct task_struct *old_next = next;
+			next = pick_next_task_rt(this_rq);
+			if (next != old_next)
+				ret = 1;
+		}
+
+		/*
+		 * Are there still pullable RT tasks?
+		 */
+		if (src_rq->rt.rt_nr_running <= 1) {
+			spin_unlock(&src_rq->lock);
+			continue;
+		}
+
+ try_pulling:
+		p = pick_next_highest_task_rt(src_rq, this_cpu);
+
+		/*
+		 * Do we have an RT task that preempts
+		 * the to-be-scheduled task?
+		 */
+		if (p && (!next || (p->prio < next->prio))) {
+			WARN_ON(p == src_rq->curr);
+			WARN_ON(!p->se.on_rq);
+
+			/*
+			 * There's a chance that p is higher in priority
+			 * than what's currently running on its cpu.
+			 * This is just that p is wakeing up and hasn't
+			 * had a chance to schedule. We only pull
+			 * p if it is lower in priority than the
+			 * current task on the run queue or
+			 * this_rq next task is lower in prio than
+			 * the current task on that rq.
+			 */
+			if (p->prio < src_rq->curr->prio ||
+			    (next && next->prio < src_rq->curr->prio))
+				goto bail;
+
+			ret = 1;
+
+			deactivate_task(src_rq, p, 0);
+			set_task_cpu(p, this_cpu);
+			activate_task(this_rq, p, 0);
+			/*
+			 * We continue with the search, just in
+			 * case there's an even higher prio task
+			 * in another runqueue. (low likelyhood
+			 * but possible)
+			 */
+
+			/*
+			 * Update next so that we won't pick a task
+			 * on another cpu with a priority lower (or equal)
+			 * than the one we just picked.
+			 */
+			next = p;
+
+		}
+ bail:
+		spin_unlock(&src_rq->lock);
+	}
+
+	return ret;
+}
+
+static void schedule_balance_rt(struct rq *rq,
+				struct task_struct *prev)
+{
+	struct rt_prio_array *array;
+	int next_prio;
+
+	/* Try to pull RT tasks here if we lower this rq's prio */
+	if (unlikely(rt_task(prev))) {
+		next_prio = MAX_RT_PRIO;
+		if (rq->rt.rt_nr_running) {
+			array = &rq->rt.active;
+			next_prio = sched_find_first_bit(array->bitmap);
+		}
+		if (next_prio > prev->prio)
+			pull_rt_task(rq);
+	}
+}
+
 static void schedule_tail_balance_rt(struct rq *rq)
 {
 	/*
@@ -388,6 +561,7 @@ static void schedule_tail_balance_rt(str
 }
 #else /* CONFIG_SMP */
 # define schedule_tail_balance_rt(rq)	do { } while (0)
+# define schedule_balance_rt(rq)	do { } while (0)
 #endif /* CONFIG_SMP */
 
 

-- 

  parent reply	other threads:[~2007-10-23  3:29 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-23  2:59 [PATCH -v2 0/7] New RT Task Balancing -v2 Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 1/7] Add rt_nr_running accounting Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 2/7] track highest prio queued on runqueue Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 3/7] push RT tasks Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 4/7] RT overloaded runqueues accounting Steven Rostedt
2007-10-23  4:17   ` Paul Jackson
2007-10-23  6:11     ` Paul Menage
2007-10-23  6:19       ` Paul Jackson
2007-10-23 13:43       ` Steven Rostedt
2007-10-23 21:46         ` Paul Jackson
2008-01-29 13:00           ` Paul Jackson
2007-10-23  2:59 ` Steven Rostedt [this message]
2007-10-23  2:59 ` [PATCH -v2 6/7] wake up balance RT Steven Rostedt
2007-10-23  2:59 ` [PATCH -v2 7/7] disable CFS RT load balancing Steven Rostedt
2007-10-23  8:39 ` [PATCH -v2 0/7] New RT Task Balancing -v2 Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071023032916.808883356@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=dmitry.adamushko@gmail.com \
    --cc=ghaskins@novell.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=pj@sgi.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.