* [PATCH 1/4 v2] Replace hooks with pre/post schedule and wakeup methods
2007-12-11 3:00 [PATCH 0/4 v2] RT balance updates against sched-devel Steven Rostedt
@ 2007-12-11 3:00 ` Steven Rostedt
2007-12-11 3:00 ` [PATCH 2/4 v2] added methods for sched_class changes Steven Rostedt
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2007-12-11 3:00 UTC (permalink / raw)
To: linux-kernel, Ingo Molnar
Cc: Dmitry Adamushko, vatsa, Balbir Singh, Peter Zijlstra,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: sched-pre-post-schedule-methods.patch --]
[-- Type: text/plain, Size: 4642 bytes --]
To make the main sched.c code more agnostic to the schedule classes.
Instead of having specific hooks in the schedule code for the RT class
balancing. They are replaced with a pre_schedule, post_schedule
and task_wake_up methods. These methods may be used by any of the classes
but currently, only the sched_rt class implements them.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/sched.h | 3 +++
kernel/sched.c | 20 ++++++++++++++++----
kernel/sched_rt.c | 17 +++++++----------
3 files changed, 26 insertions(+), 14 deletions(-)
Index: linux-sched/include/linux/sched.h
===================================================================
--- linux-sched.orig/include/linux/sched.h 2007-12-10 20:39:11.000000000 -0500
+++ linux-sched/include/linux/sched.h 2007-12-10 20:39:14.000000000 -0500
@@ -848,6 +848,9 @@ struct sched_class {
int (*move_one_task) (struct rq *this_rq, int this_cpu,
struct rq *busiest, struct sched_domain *sd,
enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+ void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
#endif
void (*set_curr_task) (struct rq *rq);
Index: linux-sched/kernel/sched.c
===================================================================
--- linux-sched.orig/kernel/sched.c 2007-12-10 20:39:11.000000000 -0500
+++ linux-sched/kernel/sched.c 2007-12-10 20:39:14.000000000 -0500
@@ -1620,7 +1620,10 @@ out_activate:
out_running:
p->state = TASK_RUNNING;
- wakeup_balance_rt(rq, p);
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_wake_up)
+ p->sched_class->task_wake_up(rq, p);
+#endif
out:
task_rq_unlock(rq, &flags);
@@ -1743,7 +1746,10 @@ void fastcall wake_up_new_task(struct ta
inc_nr_running(p, rq);
}
check_preempt_curr(rq, p);
- wakeup_balance_rt(rq, p);
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_wake_up)
+ p->sched_class->task_wake_up(rq, p);
+#endif
task_rq_unlock(rq, &flags);
}
@@ -1864,7 +1870,10 @@ static void finish_task_switch(struct rq
prev_state = prev->state;
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
- schedule_tail_balance_rt(rq);
+#ifdef CONFIG_SMP
+ if (current->sched_class->post_schedule)
+ current->sched_class->post_schedule(rq);
+#endif
fire_sched_in_preempt_notifiers(current);
if (mm)
@@ -3633,7 +3642,10 @@ need_resched_nonpreemptible:
switch_count = &prev->nvcsw;
}
- schedule_balance_rt(rq, prev);
+#ifdef CONFIG_SMP
+ if (prev->sched_class->pre_schedule)
+ prev->sched_class->pre_schedule(rq, prev);
+#endif
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
Index: linux-sched/kernel/sched_rt.c
===================================================================
--- linux-sched.orig/kernel/sched_rt.c 2007-12-10 20:39:11.000000000 -0500
+++ linux-sched/kernel/sched_rt.c 2007-12-10 20:39:14.000000000 -0500
@@ -689,14 +689,14 @@ static int pull_rt_task(struct rq *this_
return ret;
}
-static void schedule_balance_rt(struct rq *rq, struct task_struct *prev)
+static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
pull_rt_task(rq);
}
-static void schedule_tail_balance_rt(struct rq *rq)
+static void post_schedule_rt(struct rq *rq)
{
/*
* If we have more than one rt_task queued, then
@@ -713,10 +713,9 @@ static void schedule_tail_balance_rt(str
}
-static void wakeup_balance_rt(struct rq *rq, struct task_struct *p)
+static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
{
- if (unlikely(rt_task(p)) &&
- !task_running(rq, p) &&
+ if (!task_running(rq, p) &&
(p->prio >= rq->rt.highest_prio) &&
rq->rt.overloaded)
push_rt_tasks(rq);
@@ -780,11 +779,6 @@ static void leave_domain_rt(struct rq *r
if (rq->rt.overloaded)
rt_clear_overload(rq);
}
-
-#else /* CONFIG_SMP */
-# define schedule_tail_balance_rt(rq) do { } while (0)
-# define schedule_balance_rt(rq, prev) do { } while (0)
-# define wakeup_balance_rt(rq, p) do { } while (0)
#endif /* CONFIG_SMP */
static void task_tick_rt(struct rq *rq, struct task_struct *p)
@@ -838,6 +832,9 @@ const struct sched_class rt_sched_class
.set_cpus_allowed = set_cpus_allowed_rt,
.join_domain = join_domain_rt,
.leave_domain = leave_domain_rt,
+ .pre_schedule = pre_schedule_rt,
+ .post_schedule = post_schedule_rt,
+ .task_wake_up = task_wake_up_rt,
#endif
.set_curr_task = set_curr_task_rt,
--
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 2/4 v2] added methods for sched_class changes
2007-12-11 3:00 [PATCH 0/4 v2] RT balance updates against sched-devel Steven Rostedt
2007-12-11 3:00 ` [PATCH 1/4 v2] Replace hooks with pre/post schedule and wakeup methods Steven Rostedt
@ 2007-12-11 3:00 ` Steven Rostedt
2007-12-11 3:00 ` [PATCH 3/4 v2] SCHED - Only adjust overload state when changing Steven Rostedt
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2007-12-11 3:00 UTC (permalink / raw)
To: linux-kernel, Ingo Molnar
Cc: Dmitry Adamushko, vatsa, Balbir Singh, Peter Zijlstra,
Gregory Haskins, Steven Rostedt
[-- Attachment #1: sched-handle-class-switch.patch --]
[-- Type: text/plain, Size: 10154 bytes --]
Dmitry Adamushko found that the current implementation of the RT
balancing code left out changes to the sched_setscheduler and rt_mutex_setprio.
This patch addresses this issue by adding methods to the schedule classes to
handle being switched out of (switched_from) and being switched into
(switched_to) a sched_class. Also a method for changing of priorities
is also added (prio_changed).
This patch also removes some duplicate logic between rt_mutex_setprio and
sched_setscheduler.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
include/linux/sched.h | 7 +++
kernel/sched.c | 42 ++++++++++------------
kernel/sched_fair.c | 39 +++++++++++++++++++++
kernel/sched_idletask.c | 31 ++++++++++++++++
kernel/sched_rt.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 186 insertions(+), 22 deletions(-)
Index: linux-sched/include/linux/sched.h
===================================================================
--- linux-sched.orig/include/linux/sched.h 2007-12-10 20:39:14.000000000 -0500
+++ linux-sched/include/linux/sched.h 2007-12-10 20:39:17.000000000 -0500
@@ -860,6 +860,13 @@ struct sched_class {
void (*join_domain)(struct rq *rq);
void (*leave_domain)(struct rq *rq);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
};
struct load_weight {
Index: linux-sched/kernel/sched.c
===================================================================
--- linux-sched.orig/kernel/sched.c 2007-12-10 20:39:14.000000000 -0500
+++ linux-sched/kernel/sched.c 2007-12-10 20:39:17.000000000 -0500
@@ -1147,6 +1147,18 @@ static inline void __set_task_cpu(struct
#endif
}
+static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class,
+ int oldprio, int running)
+{
+ if (prev_class != p->sched_class) {
+ if (prev_class->switched_from)
+ prev_class->switched_from(rq, p, running);
+ p->sched_class->switched_to(rq, p, running);
+ } else
+ p->sched_class->prio_changed(rq, p, oldprio, running);
+}
+
#ifdef CONFIG_SMP
/*
@@ -4012,6 +4024,7 @@ void rt_mutex_setprio(struct task_struct
unsigned long flags;
int oldprio, on_rq, running;
struct rq *rq;
+ const struct sched_class *prev_class = p->sched_class;
BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -4037,18 +4050,10 @@ void rt_mutex_setprio(struct task_struct
if (on_rq) {
if (running)
p->sched_class->set_curr_task(rq);
+
enqueue_task(rq, p, 0);
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else {
- check_preempt_curr(rq, p);
- }
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
}
task_rq_unlock(rq, &flags);
}
@@ -4248,6 +4253,7 @@ int sched_setscheduler(struct task_struc
{
int retval, oldprio, oldpolicy = -1, on_rq, running;
unsigned long flags;
+ const struct sched_class *prev_class = p->sched_class;
struct rq *rq;
/* may grab non-irq protected spin_locks */
@@ -4341,18 +4347,10 @@ recheck:
if (on_rq) {
if (running)
p->sched_class->set_curr_task(rq);
+
activate_task(rq, p, 0);
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else {
- check_preempt_curr(rq, p);
- }
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
}
__task_rq_unlock(rq);
spin_unlock_irqrestore(&p->pi_lock, flags);
Index: linux-sched/kernel/sched_fair.c
===================================================================
--- linux-sched.orig/kernel/sched_fair.c 2007-12-10 20:39:11.000000000 -0500
+++ linux-sched/kernel/sched_fair.c 2007-12-10 20:39:17.000000000 -0500
@@ -1280,6 +1280,42 @@ static void task_new_fair(struct rq *rq,
resched_task(rq->curr);
}
+/*
+ * Priority of the task has changed. Check to see if we preempt
+ * the current task.
+ */
+static void prio_changed_fair(struct rq *rq, struct task_struct *p,
+ int oldprio, int running)
+{
+ /*
+ * Reschedule if we are currently running on this runqueue and
+ * our priority decreased, or if we are not currently running on
+ * this runqueue and our priority is higher than the current's
+ */
+ if (running) {
+ if (p->prio > oldprio)
+ resched_task(rq->curr);
+ } else
+ check_preempt_curr(rq, p);
+}
+
+/*
+ * We switched to the sched_fair class.
+ */
+static void switched_to_fair(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ /*
+ * We were most likely switched from sched_rt, so
+ * kick off the schedule if running, otherwise just see
+ * if we can still preempt the current task.
+ */
+ if (running)
+ resched_task(rq->curr);
+ else
+ check_preempt_curr(rq, p);
+}
+
/* Account for a task changing its policy or group.
*
* This routine is mostly called to set cfs_rq->curr field when a task
@@ -1318,6 +1354,9 @@ static const struct sched_class fair_sch
.set_curr_task = set_curr_task_fair,
.task_tick = task_tick_fair,
.task_new = task_new_fair,
+
+ .prio_changed = prio_changed_fair,
+ .switched_to = switched_to_fair,
};
#ifdef CONFIG_SCHED_DEBUG
Index: linux-sched/kernel/sched_idletask.c
===================================================================
--- linux-sched.orig/kernel/sched_idletask.c 2007-12-10 20:39:11.000000000 -0500
+++ linux-sched/kernel/sched_idletask.c 2007-12-10 20:39:17.000000000 -0500
@@ -69,6 +69,33 @@ static void set_curr_task_idle(struct rq
{
}
+static void switched_to_idle(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ /* Can this actually happen?? */
+ if (running)
+ resched_task(rq->curr);
+ else
+ check_preempt_curr(rq, p);
+}
+
+static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ int oldprio, int running)
+{
+ /* This can happen for hot plug CPUS */
+
+ /*
+ * Reschedule if we are currently running on this runqueue and
+ * our priority decreased, or if we are not currently running on
+ * this runqueue and our priority is higher than the current's
+ */
+ if (running) {
+ if (p->prio > oldprio)
+ resched_task(rq->curr);
+ } else
+ check_preempt_curr(rq, p);
+}
+
/*
* Simple, special scheduling class for the per-CPU idle tasks:
*/
@@ -94,5 +121,9 @@ const struct sched_class idle_sched_clas
.set_curr_task = set_curr_task_idle,
.task_tick = task_tick_idle,
+
+ .prio_changed = prio_changed_idle,
+ .switched_to = switched_to_idle,
+
/* no .task_new for idle tasks */
};
Index: linux-sched/kernel/sched_rt.c
===================================================================
--- linux-sched.orig/kernel/sched_rt.c 2007-12-10 20:39:14.000000000 -0500
+++ linux-sched/kernel/sched_rt.c 2007-12-10 20:39:17.000000000 -0500
@@ -779,7 +779,92 @@ static void leave_domain_rt(struct rq *r
if (rq->rt.overloaded)
rt_clear_overload(rq);
}
+
+/*
+ * When switch from the rt queue, we bring ourselves to a position
+ * that we might want to pull RT tasks from other runqueues.
+ */
+static void switched_from_rt(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ /*
+ * If there are other RT tasks then we will reschedule
+ * and the scheduling of the other RT tasks will handle
+ * the balancing. But if we are the last RT task
+ * we may need to handle the pulling of RT tasks
+ * now.
+ */
+ if (!rq->rt.rt_nr_running)
+ pull_rt_task(rq);
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * When switching a task to RT, we may overload the runqueue
+ * with RT tasks. In this case we try to push them off to
+ * other runqueues.
+ */
+static void switched_to_rt(struct rq *rq, struct task_struct *p,
+ int running)
+{
+ int check_resched = 1;
+
+ /*
+ * If we are already running, then there's nothing
+ * that needs to be done. But if we are not running
+ * we may need to preempt the current running task.
+ * If that current running task is also an RT task
+ * then see if we can move to another run queue.
+ */
+ if (!running) {
+#ifdef CONFIG_SMP
+ if (rq->rt.overloaded && push_rt_task(rq) &&
+ /* Don't resched if we changed runqueues */
+ rq != task_rq(p))
+ check_resched = 0;
#endif /* CONFIG_SMP */
+ if (check_resched && p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+ }
+}
+
+/*
+ * Priority of the task has changed. This may cause
+ * us to initiate a push or pull.
+ */
+static void prio_changed_rt(struct rq *rq, struct task_struct *p,
+ int oldprio, int running)
+{
+ if (running) {
+#ifdef CONFIG_SMP
+ /*
+ * If our priority decreases while running, we
+ * may need to pull tasks to this runqueue.
+ */
+ if (oldprio < p->prio)
+ pull_rt_task(rq);
+ /*
+ * If there's a higher priority task waiting to run
+ * then reschedule.
+ */
+ if (p->prio > rq->rt.highest_prio)
+ resched_task(p);
+#else
+ /* For UP simply resched on drop of prio */
+ if (oldprio < p->prio)
+ resched_task(p);
+#endif /* CONFIG_SMP */
+ } else {
+ /*
+ * This task is not running, but if it is
+ * greater than the current running task
+ * then reschedule.
+ */
+ if (p->prio < rq->curr->prio)
+ resched_task(rq->curr);
+ }
+}
+
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
@@ -835,8 +920,12 @@ const struct sched_class rt_sched_class
.pre_schedule = pre_schedule_rt,
.post_schedule = post_schedule_rt,
.task_wake_up = task_wake_up_rt,
+ .switched_from = switched_from_rt,
#endif
.set_curr_task = set_curr_task_rt,
.task_tick = task_tick_rt,
+
+ .prio_changed = prio_changed_rt,
+ .switched_to = switched_to_rt,
};
--
^ permalink raw reply [flat|nested] 6+ messages in thread