* [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
@ 2008-07-03 22:41 Darren Hart
2008-07-04 13:08 ` John Kacur
2008-07-15 10:01 ` Peter Zijlstra
0 siblings, 2 replies; 8+ messages in thread
From: Darren Hart @ 2008-07-03 22:41 UTC (permalink / raw)
To: linux-rt-users
Cc: Steven Rostedt, Peter Zijlstra, Clark Williams, Ingo Molnar,
Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
Enqueue deprioritized RT tasks to head of prio array
This patch backports Peter Z's enqueue to head of prio array on
de-prioritization to 2.6.24.7-rt14 which doesn't have the
enqueue_rt_entity and associated changes.
I've run several long running real-time java benchmarks and it's
holding so far. Steven, please consider this patch for inclusion
in the next 2.6.24.7-rtX release.
Peter, I didn't include your Signed-off-by as only about half your
original patch applied to 2.6.24.7-r14. If you're happy with this
version, would you also sign off?
Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
---
Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
===================================================================
--- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
+++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
@@ -897,11 +897,16 @@ struct uts_namespace;
struct rq;
struct sched_domain;
+#define ENQUEUE_WAKEUP 0x01
+#define ENQUEUE_HEAD 0x02
+
+#define DEQUEUE_SLEEP 0x01
+
struct sched_class {
const struct sched_class *next;
- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
- void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
int (*select_task_rq)(struct task_struct *p, int sync);
Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
===================================================================
--- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched.c
+++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
@@ -1046,7 +1046,7 @@ static const u32 prio_to_wmult[40] = {
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
+static void activate_task(struct rq *rq, struct task_struct *p, int flags);
/*
* runqueue iterator, to support SMP load-balancing between different
@@ -1155,16 +1155,16 @@ static void set_load_weight(struct task_
p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
}
-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
sched_info_queued(p);
- p->sched_class->enqueue_task(rq, p, wakeup);
+ p->sched_class->enqueue_task(rq, p, flags);
p->se.on_rq = 1;
}
-static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
- p->sched_class->dequeue_task(rq, p, sleep);
+ p->sched_class->dequeue_task(rq, p, flags);
p->se.on_rq = 0;
}
@@ -1219,26 +1219,26 @@ static int effective_prio(struct task_st
/*
* activate_task - move a task to the runqueue.
*/
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
ftrace_event_task_activate(p, cpu_of(rq));
- enqueue_task(rq, p, wakeup);
+ enqueue_task(rq, p, flags);
inc_nr_running(p, rq);
}
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible++;
ftrace_event_task_deactivate(p, cpu_of(rq));
- dequeue_task(rq, p, sleep);
+ dequeue_task(rq, p, flags);
dec_nr_running(p, rq);
}
@@ -1759,7 +1759,7 @@ out_activate:
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
- activate_task(rq, p, 1);
+ activate_task(rq, p, ENQUEUE_WAKEUP);
check_preempt_curr(rq, p);
success = 1;
@@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
prev->state = TASK_RUNNING;
} else {
touch_softlockup_watchdog();
- deactivate_task(rq, prev, 1);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
}
switch_count = &prev->nvcsw;
}
@@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
void task_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
- int oldprio, prev_resched, on_rq, running;
+ int oldprio, prev_resched, on_rq, running, down;
struct rq *rq;
const struct sched_class *prev_class = p->sched_class;
@@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
else
p->sched_class = &fair_sched_class;
+ down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
p->prio = prio;
// trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
@@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
if (running)
p->sched_class->set_curr_task(rq);
if (on_rq) {
- enqueue_task(rq, p, 0);
+ enqueue_task(rq, p, down);
check_class_changed(rq, p, prev_class, oldprio, running);
}
// trace_special(prev_resched, _need_resched(), 0);
Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
===================================================================
--- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
+++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
@@ -756,10 +756,11 @@ static inline struct sched_entity *paren
* increased. Here we update the fair scheduling stats and
* then put the task into the rbtree:
*/
-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ int wakeup = flags & ENQUEUE_WAKEUP;
for_each_sched_entity(se) {
if (se->on_rq)
@@ -775,10 +776,11 @@ static void enqueue_task_fair(struct rq
* decreased. We remove the task from the rbtree and
* update the fair scheduling stats:
*/
-static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ int sleep = flags & DEQUEUE_SLEEP;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
===================================================================
--- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_idletask.c
+++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
@@ -31,7 +31,7 @@ static struct task_struct *pick_next_tas
* message if some code attempts to do it:
*/
static void
-dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
{
spin_unlock_irq(&rq->lock);
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
===================================================================
--- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_rt.c
+++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
@@ -181,11 +181,16 @@ unsigned long rt_nr_uninterruptible_cpu(
return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
}
-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct rt_prio_array *array = &rq->rt.active;
- list_add_tail(&p->run_list, array->queue + p->prio);
+
+ if (unlikely(flags & ENQUEUE_HEAD))
+ list_add(&p->run_list, array->queue + p->prio);
+ else
+ list_add_tail(&p->run_list, array->queue + p->prio);
+
__set_bit(p->prio, array->bitmap);
inc_rt_tasks(p, rq);
@@ -196,7 +201,7 @@ static void enqueue_task_rt(struct rq *r
/*
* Adding/removing a task to/from a priority array:
*/
-static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct rt_prio_array *array = &rq->rt.active;
@@ -306,7 +311,7 @@ static void put_prev_task_rt(struct rq *
#define RT_MAX_TRIES 3
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-03 22:41 [RFC][PATCH] fix SCHED_FIFO spec violation (backport) Darren Hart
@ 2008-07-04 13:08 ` John Kacur
2008-07-05 15:18 ` Darren Hart
2008-07-15 10:01 ` Peter Zijlstra
1 sibling, 1 reply; 8+ messages in thread
From: John Kacur @ 2008-07-04 13:08 UTC (permalink / raw)
To: Darren Hart
Cc: linux-rt-users, Steven Rostedt, Peter Zijlstra, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
Since we're always being admonished to do code-review, and I needed to
waste some time, here are a few comments, to what looks largely like a
nice patch to me.
On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
> Enqueue deprioritized RT tasks to head of prio array
>
> This patch backports Peter Z's enqueue to head of prio array on
> de-prioritization to 2.6.24.7-rt14 which doesn't have the
> enqueue_rt_entity and associated changes.
>
> I've run several long running real-time java benchmarks and it's
> holding so far. Steven, please consider this patch for inclusion
> in the next 2.6.24.7-rtX release.
>
> Peter, I didn't include your Signed-off-by as only about half your
> original patch applied to 2.6.24.7-r14. If you're happy with this
> version, would you also sign off?
>
> Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
>
>
> ---
> Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
> +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> @@ -897,11 +897,16 @@ struct uts_namespace;
> struct rq;
> struct sched_domain;
>
> +#define ENQUEUE_WAKEUP 0x01
> +#define ENQUEUE_HEAD 0x02
> +
> +#define DEQUEUE_SLEEP 0x01
> +
Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
coincidence? The renaming of wakeup and sleep to flags makes it at
least superficially seem like they overlap. Since a large part of the
patch is renaming, it might be easier to understand if the renaming
was done as a separate patch, but on the other hand, that is probably
just a PITA. :)
> struct sched_class {
> const struct sched_class *next;
>
> - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
> - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
> + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> void (*yield_task) (struct rq *rq);
> int (*select_task_rq)(struct task_struct *p, int sync);
>
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> @@ -1046,7 +1046,7 @@ static const u32 prio_to_wmult[40] = {
> /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
> };
>
> -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
> +static void activate_task(struct rq *rq, struct task_struct *p, int flags);
>
> /*
> * runqueue iterator, to support SMP load-balancing between different
> @@ -1155,16 +1155,16 @@ static void set_load_weight(struct task_
> p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
> }
>
> -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> sched_info_queued(p);
> - p->sched_class->enqueue_task(rq, p, wakeup);
> + p->sched_class->enqueue_task(rq, p, flags);
> p->se.on_rq = 1;
> }
>
> -static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> - p->sched_class->dequeue_task(rq, p, sleep);
> + p->sched_class->dequeue_task(rq, p, flags);
> p->se.on_rq = 0;
> }
>
> @@ -1219,26 +1219,26 @@ static int effective_prio(struct task_st
> /*
> * activate_task - move a task to the runqueue.
> */
> -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
> +static void activate_task(struct rq *rq, struct task_struct *p, int flags)
> {
> if (p->state == TASK_UNINTERRUPTIBLE)
> rq->nr_uninterruptible--;
>
> ftrace_event_task_activate(p, cpu_of(rq));
> - enqueue_task(rq, p, wakeup);
> + enqueue_task(rq, p, flags);
> inc_nr_running(p, rq);
> }
>
> /*
> * deactivate_task - remove a task from the runqueue.
> */
> -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
> +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
> {
> if (p->state == TASK_UNINTERRUPTIBLE)
> rq->nr_uninterruptible++;
>
> ftrace_event_task_deactivate(p, cpu_of(rq));
> - dequeue_task(rq, p, sleep);
> + dequeue_task(rq, p, flags);
> dec_nr_running(p, rq);
> }
>
> @@ -1759,7 +1759,7 @@ out_activate:
> else
> schedstat_inc(p, se.nr_wakeups_remote);
> update_rq_clock(rq);
> - activate_task(rq, p, 1);
> + activate_task(rq, p, ENQUEUE_WAKEUP);
> check_preempt_curr(rq, p);
> success = 1;
>
> @@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
> prev->state = TASK_RUNNING;
> } else {
> touch_softlockup_watchdog();
> - deactivate_task(rq, prev, 1);
> + deactivate_task(rq, prev, DEQUEUE_SLEEP);
> }
> switch_count = &prev->nvcsw;
> }
> @@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
> void task_setprio(struct task_struct *p, int prio)
> {
> unsigned long flags;
> - int oldprio, prev_resched, on_rq, running;
> + int oldprio, prev_resched, on_rq, running, down;
> struct rq *rq;
> const struct sched_class *prev_class = p->sched_class;
>
> @@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
> else
> p->sched_class = &fair_sched_class;
>
> + down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
> p->prio = prio;
>
> // trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
> @@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
> if (running)
> p->sched_class->set_curr_task(rq);
> if (on_rq) {
> - enqueue_task(rq, p, 0);
> + enqueue_task(rq, p, down);
> check_class_changed(rq, p, prev_class, oldprio, running);
> }
> // trace_special(prev_resched, _need_resched(), 0);
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> @@ -756,10 +756,11 @@ static inline struct sched_entity *paren
> * increased. Here we update the fair scheduling stats and
> * then put the task into the rbtree:
> */
> -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> + int wakeup = flags & ENQUEUE_WAKEUP;
Minor nit: was it necessary to create a new int, why not just flags &=
ENQUEUE_WAKEUP, plus subsequent renaming where necessary.
>
> for_each_sched_entity(se) {
> if (se->on_rq)
> @@ -775,10 +776,11 @@ static void enqueue_task_fair(struct rq
> * decreased. We remove the task from the rbtree and
> * update the fair scheduling stats:
> */
> -static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> + int sleep = flags & DEQUEUE_SLEEP;
>
> for_each_sched_entity(se) {
> cfs_rq = cfs_rq_of(se);
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_idletask.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> @@ -31,7 +31,7 @@ static struct task_struct *pick_next_tas
> * message if some code attempts to do it:
> */
> static void
> -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
> +dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
> {
> spin_unlock_irq(&rq->lock);
> printk(KERN_ERR "bad: scheduling from the idle thread!\n");
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_rt.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> @@ -181,11 +181,16 @@ unsigned long rt_nr_uninterruptible_cpu(
> return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
> }
>
> -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct rt_prio_array *array = &rq->rt.active;
>
> - list_add_tail(&p->run_list, array->queue + p->prio);
> +
> + if (unlikely(flags & ENQUEUE_HEAD))
> + list_add(&p->run_list, array->queue + p->prio);
> + else
> + list_add_tail(&p->run_list, array->queue + p->prio);
> +
> __set_bit(p->prio, array->bitmap);
> inc_rt_tasks(p, rq);
>
> @@ -196,7 +201,7 @@ static void enqueue_task_rt(struct rq *r
> /*
> * Adding/removing a task to/from a priority array:
> */
> -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct rt_prio_array *array = &rq->rt.active;
>
> @@ -306,7 +311,7 @@ static void put_prev_task_rt(struct rq *
> #define RT_MAX_TRIES 3
>
> static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
> -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
> +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
>
> static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
> {
>
>
Lastly, this patch didn't apply cleanly for me! It is very possible
that I'm just doing something stupid, but here is what happened when I
tried.
tar xjf ../kernel/linux-2.6.24.tar.bz2
cd linux-2.6.24
bunzip2 -c ../../kernel/patch-2.6.24.7.bz2 | patch -p1
bunzip2 -c ../patch-2.6.24.7-rt14.bz2 | patch -p1
patch -p1 < ../dvhart.patch
patching file include/linux/sched.h
Hunk #1 FAILED at 897.
1 out of 1 hunk FAILED -- saving rejects to file include/linux/sched.h.rej
patching file kernel/sched.c
Hunk #1 succeeded at 1046 with fuzz 1.
Hunk #2 FAILED at 1155.
Hunk #3 FAILED at 1219.
Hunk #4 FAILED at 1759.
Hunk #5 FAILED at 3968.
Hunk #6 FAILED at 4431.
Hunk #7 FAILED at 4472.
Hunk #8 FAILED at 4481.
7 out of 8 hunks FAILED -- saving rejects to file kernel/sched.c.rej
patching file kernel/sched_fair.c
Hunk #1 FAILED at 756.
Hunk #2 FAILED at 776.
2 out of 2 hunks FAILED -- saving rejects to file kernel/sched_fair.c.rej
patching file kernel/sched_idletask.c
Hunk #1 succeeded at 31 with fuzz 2.
patching file kernel/sched_rt.c
Hunk #1 FAILED at 181.
Hunk #2 FAILED at 201.
2 out of 3 hunks FAILED -- saving rejects to file kernel/sched_rt.c.rej
Thanks
John
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-04 13:08 ` John Kacur
@ 2008-07-05 15:18 ` Darren Hart
2008-07-07 11:00 ` John Kacur
2008-07-15 8:03 ` Peter Zijlstra
0 siblings, 2 replies; 8+ messages in thread
From: Darren Hart @ 2008-07-05 15:18 UTC (permalink / raw)
To: John Kacur
Cc: linux-rt-users, Steven Rostedt, Peter Zijlstra, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
[-- Attachment #1: Type: text/plain, Size: 14586 bytes --]
On Fri, 2008-07-04 at 15:08 +0200, John Kacur wrote:
> Since we're always being admonished to do code-review, and I needed to
> waste some time, here are a few comments, to what looks largely like a
> nice patch to me.
Hi John, thanks for the review. Note that this is a backport of Peter'z
git patch, so I'll try to address some of your rationale questions
below, but the short answer is "I tried to match Peter's implementation
as closely as possible." Regarding the patch applying, it worked for
me... see below.
>
> On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
> > Enqueue deprioritized RT tasks to head of prio array
> >
> > This patch backports Peter Z's enqueue to head of prio array on
> > de-prioritization to 2.6.24.7-rt14 which doesn't have the
> > enqueue_rt_entity and associated changes.
> >
> > I've run several long running real-time java benchmarks and it's
> > holding so far. Steven, please consider this patch for inclusion
> > in the next 2.6.24.7-rtX release.
> >
> > Peter, I didn't include your Signed-off-by as only about half your
> > original patch applied to 2.6.24.7-r14. If you're happy with this
> > version, would you also sign off?
> >
> > Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
> >
> >
> > ---
> > Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> > ===================================================================
> > --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
> > +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> > @@ -897,11 +897,16 @@ struct uts_namespace;
> > struct rq;
> > struct sched_domain;
> >
> > +#define ENQUEUE_WAKEUP 0x01
> > +#define ENQUEUE_HEAD 0x02
> > +
> > +#define DEQUEUE_SLEEP 0x01
> > +
>
> Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
> coincidence?
Coincidence. The ENQUEUE_* flags are only to be used with the
enqueue_task* methods, while the DEQUEUE_* flags are for deqeue_task*.
Note that the conversion of sleep to the DEQUEUE_SLEEP flag isn't really
necessary as there is only the one flag, but it makes the calls
parallel, which I suspect was Peter's intention (but I speculate here).
> The renaming of wakeup and sleep to flags makes it at
> least superficially seem like they overlap. Since a large part of the
> patch is renaming, it might be easier to understand if the renaming
> was done as a separate patch, but on the other hand, that is probably
> just a PITA. :)
Seems a small enough patch to be all in one to me. If others object
I'll split it out, but again, I tried to keep the backport as close to
Peter's original patch as possible.
>
> > struct sched_class {
> > const struct sched_class *next;
> >
> > - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
> > - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
> > + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> > + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> > void (*yield_task) (struct rq *rq);
> > int (*select_task_rq)(struct task_struct *p, int sync);
> >
> > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> > ===================================================================
> > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched.c
> > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> > @@ -1046,7 +1046,7 @@ static const u32 prio_to_wmult[40] = {
> > /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
> > };
> >
> > -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
> > +static void activate_task(struct rq *rq, struct task_struct *p, int flags);
> >
> > /*
> > * runqueue iterator, to support SMP load-balancing between different
> > @@ -1155,16 +1155,16 @@ static void set_load_weight(struct task_
> > p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
> > }
> >
> > -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
> > +static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > {
> > sched_info_queued(p);
> > - p->sched_class->enqueue_task(rq, p, wakeup);
> > + p->sched_class->enqueue_task(rq, p, flags);
> > p->se.on_rq = 1;
> > }
> >
> > -static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
> > +static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
> > {
> > - p->sched_class->dequeue_task(rq, p, sleep);
> > + p->sched_class->dequeue_task(rq, p, flags);
> > p->se.on_rq = 0;
> > }
> >
> > @@ -1219,26 +1219,26 @@ static int effective_prio(struct task_st
> > /*
> > * activate_task - move a task to the runqueue.
> > */
> > -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
> > +static void activate_task(struct rq *rq, struct task_struct *p, int flags)
> > {
> > if (p->state == TASK_UNINTERRUPTIBLE)
> > rq->nr_uninterruptible--;
> >
> > ftrace_event_task_activate(p, cpu_of(rq));
> > - enqueue_task(rq, p, wakeup);
> > + enqueue_task(rq, p, flags);
> > inc_nr_running(p, rq);
> > }
> >
> > /*
> > * deactivate_task - remove a task from the runqueue.
> > */
> > -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
> > +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
> > {
> > if (p->state == TASK_UNINTERRUPTIBLE)
> > rq->nr_uninterruptible++;
> >
> > ftrace_event_task_deactivate(p, cpu_of(rq));
> > - dequeue_task(rq, p, sleep);
> > + dequeue_task(rq, p, flags);
> > dec_nr_running(p, rq);
> > }
> >
> > @@ -1759,7 +1759,7 @@ out_activate:
> > else
> > schedstat_inc(p, se.nr_wakeups_remote);
> > update_rq_clock(rq);
> > - activate_task(rq, p, 1);
> > + activate_task(rq, p, ENQUEUE_WAKEUP);
> > check_preempt_curr(rq, p);
> > success = 1;
> >
> > @@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
> > prev->state = TASK_RUNNING;
> > } else {
> > touch_softlockup_watchdog();
> > - deactivate_task(rq, prev, 1);
> > + deactivate_task(rq, prev, DEQUEUE_SLEEP);
> > }
> > switch_count = &prev->nvcsw;
> > }
> > @@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
> > void task_setprio(struct task_struct *p, int prio)
> > {
> > unsigned long flags;
> > - int oldprio, prev_resched, on_rq, running;
> > + int oldprio, prev_resched, on_rq, running, down;
> > struct rq *rq;
> > const struct sched_class *prev_class = p->sched_class;
> >
> > @@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
> > else
> > p->sched_class = &fair_sched_class;
> >
> > + down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
> > p->prio = prio;
> >
> > // trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
> > @@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
> > if (running)
> > p->sched_class->set_curr_task(rq);
> > if (on_rq) {
> > - enqueue_task(rq, p, 0);
> > + enqueue_task(rq, p, down);
> > check_class_changed(rq, p, prev_class, oldprio, running);
> > }
> > // trace_special(prev_resched, _need_resched(), 0);
> > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> > ===================================================================
> > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
> > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> > @@ -756,10 +756,11 @@ static inline struct sched_entity *paren
> > * increased. Here we update the fair scheduling stats and
> > * then put the task into the rbtree:
> > */
> > -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
> > +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > {
> > struct cfs_rq *cfs_rq;
> > struct sched_entity *se = &p->se;
> > + int wakeup = flags & ENQUEUE_WAKEUP;
>
> Minor nit: was it necessary to create a new int, why not just flags &=
> ENQUEUE_WAKEUP, plus subsequent renaming where necessary.
>
>
Well... I've copied the entire function here for context:
static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int
flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
int wakeup = flags & ENQUEUE_WAKEUP;
for_each_sched_entity(se) {
if (se->on_rq)
break;
cfs_rq = cfs_rq_of(se);
enqueue_entity(cfs_rq, se, wakeup);
wakeup = 1;
}
}
Note that "wakeup = 1;" for all but the initial entity which uses the
flag that was passed in. So if this is correct behavior, then the new
integer seems like a reasonable approach to me. Note that the
dequeue_task_fair has a parallel implementation.
Peter, can you explain why only the first entity is subject to the value
of the passed-in flags in these two functions. I understand this was
the orginal behavior as well.
> >
> > for_each_sched_entity(se) {
> > if (se->on_rq)
> > @@ -775,10 +776,11 @@ static void enqueue_task_fair(struct rq
> > * decreased. We remove the task from the rbtree and
> > * update the fair scheduling stats:
> > */
> > -static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
> > +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > {
> > struct cfs_rq *cfs_rq;
> > struct sched_entity *se = &p->se;
> > + int sleep = flags & DEQUEUE_SLEEP;
> >
> > for_each_sched_entity(se) {
> > cfs_rq = cfs_rq_of(se);
> > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> > ===================================================================
> > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_idletask.c
> > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> > @@ -31,7 +31,7 @@ static struct task_struct *pick_next_tas
> > * message if some code attempts to do it:
> > */
> > static void
> > -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
> > +dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
> > {
> > spin_unlock_irq(&rq->lock);
> > printk(KERN_ERR "bad: scheduling from the idle thread!\n");
> > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> > ===================================================================
> > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_rt.c
> > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> > @@ -181,11 +181,16 @@ unsigned long rt_nr_uninterruptible_cpu(
> > return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
> > }
> >
> > -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
> > +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> > {
> > struct rt_prio_array *array = &rq->rt.active;
> >
> > - list_add_tail(&p->run_list, array->queue + p->prio);
> > +
> > + if (unlikely(flags & ENQUEUE_HEAD))
> > + list_add(&p->run_list, array->queue + p->prio);
> > + else
> > + list_add_tail(&p->run_list, array->queue + p->prio);
> > +
> > __set_bit(p->prio, array->bitmap);
> > inc_rt_tasks(p, rq);
> >
> > @@ -196,7 +201,7 @@ static void enqueue_task_rt(struct rq *r
> > /*
> > * Adding/removing a task to/from a priority array:
> > */
> > -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
> > +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> > {
> > struct rt_prio_array *array = &rq->rt.active;
> >
> > @@ -306,7 +311,7 @@ static void put_prev_task_rt(struct rq *
> > #define RT_MAX_TRIES 3
> >
> > static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
> > -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
> > +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
> >
> > static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
> > {
> >
> >
>
> Lastly, this patch didn't apply cleanly for me! It is very possible
> that I'm just doing something stupid, but here is what happened when I
> tried.
> tar xjf ../kernel/linux-2.6.24.tar.bz2
> cd linux-2.6.24
> bunzip2 -c ../../kernel/patch-2.6.24.7.bz2 | patch -p1
> bunzip2 -c ../patch-2.6.24.7-rt14.bz2 | patch -p1
>
> patch -p1 < ../dvhart.patch
Hrm.. well, I suspect a problem with saving off my patch from the list.
I saved it from the list, removed the email headers and footer and then
used ketchup and quilt to apply it:
$ ketchup -d 2.6.24.7-rt14 -G 2.6.24.7-rt14
$ cd 2.6.24.7-rt14
$ mkdir patches
$ quilt import ~/Desktop/backport.patch
$ quilt push
Applying patch backport.patch
patching file include/linux/sched.h
patching file kernel/sched.c
patching file kernel/sched_fair.c
Hunk #1 succeeded at 760 (offset 4 lines).
Hunk #2 succeeded at 780 (offset 4 lines).
patching file kernel/sched_idletask.c
patching file kernel/sched_rt.c
Now at patch backport.patch
Give it another shot, and let me know if you still run into problems.
I've attached a refreshed patch to account for the "offset 4" messages
above.
Thanks,
Darren
> patching file include/linux/sched.h
> Hunk #1 FAILED at 897.
> 1 out of 1 hunk FAILED -- saving rejects to file include/linux/sched.h.rej
> patching file kernel/sched.c
> Hunk #1 succeeded at 1046 with fuzz 1.
> Hunk #2 FAILED at 1155.
> Hunk #3 FAILED at 1219.
> Hunk #4 FAILED at 1759.
> Hunk #5 FAILED at 3968.
> Hunk #6 FAILED at 4431.
> Hunk #7 FAILED at 4472.
> Hunk #8 FAILED at 4481.
> 7 out of 8 hunks FAILED -- saving rejects to file kernel/sched.c.rej
> patching file kernel/sched_fair.c
> Hunk #1 FAILED at 756.
> Hunk #2 FAILED at 776.
> 2 out of 2 hunks FAILED -- saving rejects to file kernel/sched_fair.c.rej
> patching file kernel/sched_idletask.c
> Hunk #1 succeeded at 31 with fuzz 2.
> patching file kernel/sched_rt.c
> Hunk #1 FAILED at 181.
> Hunk #2 FAILED at 201.
> 2 out of 3 hunks FAILED -- saving rejects to file kernel/sched_rt.c.rej
>
> Thanks
> John
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Darren Hart
Real-Time Linux Team Lead
IBM Linux Technology Center
[-- Attachment #2: backport.patch --]
[-- Type: text/x-patch, Size: 7938 bytes --]
Enqueue deprioritized RT tasks to head of prio array
This patch backports Peter Z's enqueue to head of prio array on
de-prioritization to 2.6.24.7-rt14 which doesn't have the
enqueue_rt_entity and associated changes.
I've run several long running real-time java benchmarks and it's
holding so far. Steven, please consider this patch for inclusion
in the next 2.6.24.7-rtX release.
Peter, I didn't include your Signed-off-by as only about half your
original patch applied to 2.6.24.7-r14. If you're happy with this
version, would you also sign off?
Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
---
Index: 2.6.24.7-rt14/include/linux/sched.h
===================================================================
--- 2.6.24.7-rt14.orig/include/linux/sched.h 2008-07-05 07:52:45.000000000 -0700
+++ 2.6.24.7-rt14/include/linux/sched.h 2008-07-05 07:54:54.000000000 -0700
@@ -897,11 +897,16 @@
struct rq;
struct sched_domain;
+#define ENQUEUE_WAKEUP 0x01
+#define ENQUEUE_HEAD 0x02
+
+#define DEQUEUE_SLEEP 0x01
+
struct sched_class {
const struct sched_class *next;
- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
- void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq);
int (*select_task_rq)(struct task_struct *p, int sync);
Index: 2.6.24.7-rt14/kernel/sched.c
===================================================================
--- 2.6.24.7-rt14.orig/kernel/sched.c 2008-07-05 07:52:45.000000000 -0700
+++ 2.6.24.7-rt14/kernel/sched.c 2008-07-05 07:54:54.000000000 -0700
@@ -1046,7 +1046,7 @@
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
+static void activate_task(struct rq *rq, struct task_struct *p, int flags);
/*
* runqueue iterator, to support SMP load-balancing between different
@@ -1155,16 +1155,16 @@
p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
}
-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
sched_info_queued(p);
- p->sched_class->enqueue_task(rq, p, wakeup);
+ p->sched_class->enqueue_task(rq, p, flags);
p->se.on_rq = 1;
}
-static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{
- p->sched_class->dequeue_task(rq, p, sleep);
+ p->sched_class->dequeue_task(rq, p, flags);
p->se.on_rq = 0;
}
@@ -1219,26 +1219,26 @@
/*
* activate_task - move a task to the runqueue.
*/
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
ftrace_event_task_activate(p, cpu_of(rq));
- enqueue_task(rq, p, wakeup);
+ enqueue_task(rq, p, flags);
inc_nr_running(p, rq);
}
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible++;
ftrace_event_task_deactivate(p, cpu_of(rq));
- dequeue_task(rq, p, sleep);
+ dequeue_task(rq, p, flags);
dec_nr_running(p, rq);
}
@@ -1759,7 +1759,7 @@
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
- activate_task(rq, p, 1);
+ activate_task(rq, p, ENQUEUE_WAKEUP);
check_preempt_curr(rq, p);
success = 1;
@@ -3968,7 +3968,7 @@
prev->state = TASK_RUNNING;
} else {
touch_softlockup_watchdog();
- deactivate_task(rq, prev, 1);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
}
switch_count = &prev->nvcsw;
}
@@ -4431,7 +4431,7 @@
void task_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
- int oldprio, prev_resched, on_rq, running;
+ int oldprio, prev_resched, on_rq, running, down;
struct rq *rq;
const struct sched_class *prev_class = p->sched_class;
@@ -4472,6 +4472,7 @@
else
p->sched_class = &fair_sched_class;
+ down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
p->prio = prio;
// trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
@@ -4480,7 +4481,7 @@
if (running)
p->sched_class->set_curr_task(rq);
if (on_rq) {
- enqueue_task(rq, p, 0);
+ enqueue_task(rq, p, down);
check_class_changed(rq, p, prev_class, oldprio, running);
}
// trace_special(prev_resched, _need_resched(), 0);
Index: 2.6.24.7-rt14/kernel/sched_fair.c
===================================================================
--- 2.6.24.7-rt14.orig/kernel/sched_fair.c 2008-07-05 07:52:45.000000000 -0700
+++ 2.6.24.7-rt14/kernel/sched_fair.c 2008-07-05 07:54:54.000000000 -0700
@@ -760,10 +760,11 @@
* increased. Here we update the fair scheduling stats and
* then put the task into the rbtree:
*/
-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ int wakeup = flags & ENQUEUE_WAKEUP;
for_each_sched_entity(se) {
if (se->on_rq)
@@ -779,10 +780,11 @@
* decreased. We remove the task from the rbtree and
* update the fair scheduling stats:
*/
-static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ int sleep = flags & DEQUEUE_SLEEP;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
Index: 2.6.24.7-rt14/kernel/sched_idletask.c
===================================================================
--- 2.6.24.7-rt14.orig/kernel/sched_idletask.c 2008-07-05 07:52:45.000000000 -0700
+++ 2.6.24.7-rt14/kernel/sched_idletask.c 2008-07-05 07:54:54.000000000 -0700
@@ -31,7 +31,7 @@
* message if some code attempts to do it:
*/
static void
-dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
{
spin_unlock_irq(&rq->lock);
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
Index: 2.6.24.7-rt14/kernel/sched_rt.c
===================================================================
--- 2.6.24.7-rt14.orig/kernel/sched_rt.c 2008-07-05 07:52:45.000000000 -0700
+++ 2.6.24.7-rt14/kernel/sched_rt.c 2008-07-05 07:54:54.000000000 -0700
@@ -181,11 +181,16 @@
return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
}
-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct rt_prio_array *array = &rq->rt.active;
- list_add_tail(&p->run_list, array->queue + p->prio);
+
+ if (unlikely(flags & ENQUEUE_HEAD))
+ list_add(&p->run_list, array->queue + p->prio);
+ else
+ list_add_tail(&p->run_list, array->queue + p->prio);
+
__set_bit(p->prio, array->bitmap);
inc_rt_tasks(p, rq);
@@ -196,7 +201,7 @@
/*
* Adding/removing a task to/from a priority array:
*/
-static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{
struct rt_prio_array *array = &rq->rt.active;
@@ -306,7 +311,7 @@
#define RT_MAX_TRIES 3
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-05 15:18 ` Darren Hart
@ 2008-07-07 11:00 ` John Kacur
2008-07-07 15:24 ` Darren Hart
2008-07-15 8:03 ` Peter Zijlstra
1 sibling, 1 reply; 8+ messages in thread
From: John Kacur @ 2008-07-07 11:00 UTC (permalink / raw)
To: Darren Hart
Cc: linux-rt-users, Steven Rostedt, Peter Zijlstra, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
[-- Attachment #1: Type: text/plain, Size: 8039 bytes --]
On Sat, Jul 5, 2008 at 5:18 PM, Darren Hart <dvhltc@us.ibm.com> wrote:
> On Fri, 2008-07-04 at 15:08 +0200, John Kacur wrote:
>> Since we're always being admonished to do code-review, and I needed to
>> waste some time, here are a few comments, to what looks largely like a
>> nice patch to me.
>
> Hi John, thanks for the review. Note that this is a backport of Peter'z
> git patch, so I'll try to address some of your rationale questions
> below, but the short answer is "I tried to match Peter's implementation
> as closely as possible." Regarding the patch applying, it worked for
> me... see below.
>
>>
>> On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
>> > Enqueue deprioritized RT tasks to head of prio array
>> >
>> > This patch backports Peter Z's enqueue to head of prio array on
>> > de-prioritization to 2.6.24.7-rt14 which doesn't have the
>> > enqueue_rt_entity and associated changes.
>> >
>> > I've run several long running real-time java benchmarks and it's
>> > holding so far. Steven, please consider this patch for inclusion
>> > in the next 2.6.24.7-rtX release.
>> >
>> > Peter, I didn't include your Signed-off-by as only about half your
>> > original patch applied to 2.6.24.7-r14. If you're happy with this
>> > version, would you also sign off?
>> >
>> > Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
>> >
>> >
>> > ---
>> > Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
>> > ===================================================================
>> > --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
>> > +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
>> > @@ -897,11 +897,16 @@ struct uts_namespace;
>> > struct rq;
>> > struct sched_domain;
>> >
>> > +#define ENQUEUE_WAKEUP 0x01
>> > +#define ENQUEUE_HEAD 0x02
>> > +
>> > +#define DEQUEUE_SLEEP 0x01
>> > +
>>
>> Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
>> coincidence?
>
> Coincidence. The ENQUEUE_* flags are only to be used with the
> enqueue_task* methods, while the DEQUEUE_* flags are for deqeue_task*.
> Note that the conversion of sleep to the DEQUEUE_SLEEP flag isn't really
> necessary as there is only the one flag, but it makes the calls
> parallel, which I suspect was Peter's intention (but I speculate here).
>
>> The renaming of wakeup and sleep to flags makes it at
>> least superficially seem like they overlap. Since a large part of the
>> patch is renaming, it might be easier to understand if the renaming
>> was done as a separate patch, but on the other hand, that is probably
>> just a PITA. :)
>
> Seems a small enough patch to be all in one to me. If others object
> I'll split it out, but again, I tried to keep the backport as close to
> Peter's original patch as possible.
----SNIP-----
I'm not sure the renaming is necessary at all, since "wakeup" and
"sleep" seem more descriptive than "flags". If you skip the renaming
then the size of the patch is reduced to half it's current size.
>> Minor nit: was it necessary to create a new int, why not just flags &=
>> ENQUEUE_WAKEUP, plus subsequent renaming where necessary.
>>
>>
>
>
> Well... I've copied the entire function here for context:
>
> static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int
> flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> int wakeup = flags & ENQUEUE_WAKEUP;
>
> for_each_sched_entity(se) {
> if (se->on_rq)
> break;
> cfs_rq = cfs_rq_of(se);
> enqueue_entity(cfs_rq, se, wakeup);
> wakeup = 1;
> }
> }
>
> Note that "wakeup = 1;" for all but the initial entity which uses the
> flag that was passed in. So if this is correct behavior, then the new
> integer seems like a reasonable approach to me. Note that the
> dequeue_task_fair has a parallel implementation.
>
> Peter, can you explain why only the first entity is subject to the value
> of the passed-in flags in these two functions. I understand this was
> the orginal behavior as well.
I wonder if the only reason the extra int was created was just as a
quick way to deal with the renaming of the argument but not changing
the code in the function. If we skip the renmaing once again, then it
becomes slightly simpler.
Ok, so Peter Z did the hard work of creating the original patch, and
Darren Hart did the hard work of backporting it, I am doing the
relatively simple task of refractoring it, this reduces Darren's patch
in size by at least half. Maybe this alternate patch would be more
readily acceptable by Steve et al? OTOH Note that Darren extensively
tested his version of the patch, and I just did a compile and boot
test.
Including both and inlined and attachment until I can figure out how
to avoid line wrap in gmail.
Signed-off-by: John Kacur <jkacur@gmail.com>
Index: linux-2.6.24.7-rt14/include/linux/sched.h
===================================================================
--- linux-2.6.24.7-rt14.orig/include/linux/sched.h
+++ linux-2.6.24.7-rt14/include/linux/sched.h
@@ -897,6 +897,11 @@ struct uts_namespace;
struct rq;
struct sched_domain;
+#define ENQUEUE_WAKEUP 0x01
+#define ENQUEUE_HEAD 0x02
+
+#define DEQUEUE_SLEEP 0x01
+
struct sched_class {
const struct sched_class *next;
Index: linux-2.6.24.7-rt14/kernel/sched.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched.c
+++ linux-2.6.24.7-rt14/kernel/sched.c
@@ -1759,7 +1759,7 @@ out_activate:
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
- activate_task(rq, p, 1);
+ activate_task(rq, p, ENQUEUE_WAKEUP);
check_preempt_curr(rq, p);
success = 1;
@@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
prev->state = TASK_RUNNING;
} else {
touch_softlockup_watchdog();
- deactivate_task(rq, prev, 1);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
}
switch_count = &prev->nvcsw;
}
@@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
void task_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
- int oldprio, prev_resched, on_rq, running;
+ int oldprio, prev_resched, on_rq, running, down;
struct rq *rq;
const struct sched_class *prev_class = p->sched_class;
@@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
else
p->sched_class = &fair_sched_class;
+ down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
p->prio = prio;
// trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
@@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
if (running)
p->sched_class->set_curr_task(rq);
if (on_rq) {
- enqueue_task(rq, p, 0);
+ enqueue_task(rq, p, down);
check_class_changed(rq, p, prev_class, oldprio, running);
}
// trace_special(prev_resched, _need_resched(), 0);
Index: linux-2.6.24.7-rt14/kernel/sched_fair.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched_fair.c
+++ linux-2.6.24.7-rt14/kernel/sched_fair.c
@@ -764,6 +764,7 @@ static void enqueue_task_fair(struct rq
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ wakeup &= ENQUEUE_WAKEUP;
for_each_sched_entity(se) {
if (se->on_rq)
@@ -783,6 +784,7 @@ static void dequeue_task_fair(struct rq
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ sleep &= DEQUEUE_SLEEP;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
Index: linux-2.6.24.7-rt14/kernel/sched_rt.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched_rt.c
+++ linux-2.6.24.7-rt14/kernel/sched_rt.c
@@ -185,7 +185,11 @@ static void enqueue_task_rt(struct rq *r
{
struct rt_prio_array *array = &rq->rt.active;
- list_add_tail(&p->run_list, array->queue + p->prio);
+ if (unlikely(wakeup & ENQUEUE_HEAD))
+ list_add(&p->run_list, array->queue + p->prio);
+ else
+ list_add_tail(&p->run_list, array->queue + p->prio);
+
__set_bit(p->prio, array->bitmap);
inc_rt_tasks(p, rq);
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: pandd.patch --]
[-- Type: text/x-patch; name=pandd.patch, Size: 3215 bytes --]
Signed-off-by: John Kacur <jkacur@gmail.com>
Index: linux-2.6.24.7-rt14/include/linux/sched.h
===================================================================
--- linux-2.6.24.7-rt14.orig/include/linux/sched.h
+++ linux-2.6.24.7-rt14/include/linux/sched.h
@@ -897,6 +897,11 @@ struct uts_namespace;
struct rq;
struct sched_domain;
+#define ENQUEUE_WAKEUP 0x01
+#define ENQUEUE_HEAD 0x02
+
+#define DEQUEUE_SLEEP 0x01
+
struct sched_class {
const struct sched_class *next;
Index: linux-2.6.24.7-rt14/kernel/sched.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched.c
+++ linux-2.6.24.7-rt14/kernel/sched.c
@@ -1759,7 +1759,7 @@ out_activate:
else
schedstat_inc(p, se.nr_wakeups_remote);
update_rq_clock(rq);
- activate_task(rq, p, 1);
+ activate_task(rq, p, ENQUEUE_WAKEUP);
check_preempt_curr(rq, p);
success = 1;
@@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
prev->state = TASK_RUNNING;
} else {
touch_softlockup_watchdog();
- deactivate_task(rq, prev, 1);
+ deactivate_task(rq, prev, DEQUEUE_SLEEP);
}
switch_count = &prev->nvcsw;
}
@@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
void task_setprio(struct task_struct *p, int prio)
{
unsigned long flags;
- int oldprio, prev_resched, on_rq, running;
+ int oldprio, prev_resched, on_rq, running, down;
struct rq *rq;
const struct sched_class *prev_class = p->sched_class;
@@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
else
p->sched_class = &fair_sched_class;
+ down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
p->prio = prio;
// trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
@@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
if (running)
p->sched_class->set_curr_task(rq);
if (on_rq) {
- enqueue_task(rq, p, 0);
+ enqueue_task(rq, p, down);
check_class_changed(rq, p, prev_class, oldprio, running);
}
// trace_special(prev_resched, _need_resched(), 0);
Index: linux-2.6.24.7-rt14/kernel/sched_fair.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched_fair.c
+++ linux-2.6.24.7-rt14/kernel/sched_fair.c
@@ -764,6 +764,7 @@ static void enqueue_task_fair(struct rq
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ wakeup &= ENQUEUE_WAKEUP;
for_each_sched_entity(se) {
if (se->on_rq)
@@ -783,6 +784,7 @@ static void dequeue_task_fair(struct rq
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
+ sleep &= DEQUEUE_SLEEP;
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
Index: linux-2.6.24.7-rt14/kernel/sched_rt.c
===================================================================
--- linux-2.6.24.7-rt14.orig/kernel/sched_rt.c
+++ linux-2.6.24.7-rt14/kernel/sched_rt.c
@@ -185,7 +185,11 @@ static void enqueue_task_rt(struct rq *r
{
struct rt_prio_array *array = &rq->rt.active;
- list_add_tail(&p->run_list, array->queue + p->prio);
+ if (unlikely(wakeup & ENQUEUE_HEAD))
+ list_add(&p->run_list, array->queue + p->prio);
+ else
+ list_add_tail(&p->run_list, array->queue + p->prio);
+
__set_bit(p->prio, array->bitmap);
inc_rt_tasks(p, rq);
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-07 11:00 ` John Kacur
@ 2008-07-07 15:24 ` Darren Hart
0 siblings, 0 replies; 8+ messages in thread
From: Darren Hart @ 2008-07-07 15:24 UTC (permalink / raw)
To: John Kacur
Cc: linux-rt-users, Steven Rostedt, Peter Zijlstra, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
On Mon, 2008-07-07 at 13:00 +0200, John Kacur wrote:
> On Sat, Jul 5, 2008 at 5:18 PM, Darren Hart <dvhltc@us.ibm.com> wrote:
> > On Fri, 2008-07-04 at 15:08 +0200, John Kacur wrote:
> >> Since we're always being admonished to do code-review, and I needed to
> >> waste some time, here are a few comments, to what looks largely like a
> >> nice patch to me.
> >
> > Hi John, thanks for the review. Note that this is a backport of Peter'z
> > git patch, so I'll try to address some of your rationale questions
> > below, but the short answer is "I tried to match Peter's implementation
> > as closely as possible." Regarding the patch applying, it worked for
> > me... see below.
> >
> >>
> >> On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
> >> > Enqueue deprioritized RT tasks to head of prio array
> >> >
> >> > This patch backports Peter Z's enqueue to head of prio array on
> >> > de-prioritization to 2.6.24.7-rt14 which doesn't have the
> >> > enqueue_rt_entity and associated changes.
> >> >
> >> > I've run several long running real-time java benchmarks and it's
> >> > holding so far. Steven, please consider this patch for inclusion
> >> > in the next 2.6.24.7-rtX release.
> >> >
> >> > Peter, I didn't include your Signed-off-by as only about half your
> >> > original patch applied to 2.6.24.7-r14. If you're happy with this
> >> > version, would you also sign off?
> >> >
> >> > Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
> >> >
> >> >
> >> > ---
> >> > Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> >> > ===================================================================
> >> > --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
> >> > +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> >> > @@ -897,11 +897,16 @@ struct uts_namespace;
> >> > struct rq;
> >> > struct sched_domain;
> >> >
> >> > +#define ENQUEUE_WAKEUP 0x01
> >> > +#define ENQUEUE_HEAD 0x02
> >> > +
> >> > +#define DEQUEUE_SLEEP 0x01
> >> > +
> >>
> >> Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
> >> coincidence?
> >
> > Coincidence. The ENQUEUE_* flags are only to be used with the
> > enqueue_task* methods, while the DEQUEUE_* flags are for deqeue_task*.
> > Note that the conversion of sleep to the DEQUEUE_SLEEP flag isn't really
> > necessary as there is only the one flag, but it makes the calls
> > parallel, which I suspect was Peter's intention (but I speculate here).
> >
> >> The renaming of wakeup and sleep to flags makes it at
> >> least superficially seem like they overlap. Since a large part of the
> >> patch is renaming, it might be easier to understand if the renaming
> >> was done as a separate patch, but on the other hand, that is probably
> >> just a PITA. :)
> >
> > Seems a small enough patch to be all in one to me. If others object
> > I'll split it out, but again, I tried to keep the backport as close to
> > Peter's original patch as possible.
> ----SNIP-----
> I'm not sure the renaming is necessary at all, since "wakeup" and
> "sleep" seem more descriptive than "flags". If you skip the renaming
> then the size of the patch is reduced to half it's current size.
The renaming isn't strictly necessary, but the name "flags" denotes that
there are multiple value that can be passed whereas "wakeup" is binary -
either on or off. Since ENQUEUE_HEAD has nothing to do with "wakeup" it
doesn't make since to reuse the old name. Changing the dequeue*
routines makes the related calls parallel in signature and allows for
similar changes in the future.
Also, as this is a backport, I would rather not try and change it here
only to have it revert back to Peter's original in the upstream -rt
tree.
Thanks,
--
Darren Hart
Real-Time Linux Team Lead
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-05 15:18 ` Darren Hart
2008-07-07 11:00 ` John Kacur
@ 2008-07-15 8:03 ` Peter Zijlstra
2008-07-15 9:19 ` John Kacur
1 sibling, 1 reply; 8+ messages in thread
From: Peter Zijlstra @ 2008-07-15 8:03 UTC (permalink / raw)
To: Darren Hart
Cc: John Kacur, linux-rt-users, Steven Rostedt, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
On Sat, 2008-07-05 at 08:18 -0700, Darren Hart wrote:
> On Fri, 2008-07-04 at 15:08 +0200, John Kacur wrote:
> > On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
> > > Enqueue deprioritized RT tasks to head of prio array
> > >
> > > This patch backports Peter Z's enqueue to head of prio array on
> > > de-prioritization to 2.6.24.7-rt14 which doesn't have the
> > > enqueue_rt_entity and associated changes.
> > >
> > > I've run several long running real-time java benchmarks and it's
> > > holding so far. Steven, please consider this patch for inclusion
> > > in the next 2.6.24.7-rtX release.
> > >
> > > Peter, I didn't include your Signed-off-by as only about half your
> > > original patch applied to 2.6.24.7-r14. If you're happy with this
> > > version, would you also sign off?
> > >
> > > Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
> > >
> > >
> > > ---
> > > Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> > > ===================================================================
> > > --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
> > > +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> > > @@ -897,11 +897,16 @@ struct uts_namespace;
> > > struct rq;
> > > struct sched_domain;
> > >
> > > +#define ENQUEUE_WAKEUP 0x01
> > > +#define ENQUEUE_HEAD 0x02
> > > +
> > > +#define DEQUEUE_SLEEP 0x01
> > > +
> >
> > Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
> > coincidence?
>
> Coincidence. The ENQUEUE_* flags are only to be used with the
> enqueue_task* methods, while the DEQUEUE_* flags are for deqeue_task*.
> Note that the conversion of sleep to the DEQUEUE_SLEEP flag isn't really
> necessary as there is only the one flag, but it makes the calls
> parallel, which I suspect was Peter's intention (but I speculate here).
Indeed.
> > The renaming of wakeup and sleep to flags makes it at
> > least superficially seem like they overlap. Since a large part of the
> > patch is renaming, it might be easier to understand if the renaming
> > was done as a separate patch, but on the other hand, that is probably
> > just a PITA. :)
>
> Seems a small enough patch to be all in one to me. If others object
> I'll split it out, but again, I tried to keep the backport as close to
> Peter's original patch as possible.
I just hacked together what was needed to post out an RFC, never made it
a 'pretty' series ;-)
One could go the extra length and make it multiple patches, but its not
too big, so I'm not sure we should worry about that.
> >
> > > struct sched_class {
> > > const struct sched_class *next;
> > >
> > > - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
> > > - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
> > > + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> > > + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> > > void (*yield_task) (struct rq *rq);
> > > int (*select_task_rq)(struct task_struct *p, int sync);
> > >
> > > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> > > ===================================================================
> > > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
> > > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> > > @@ -756,10 +756,11 @@ static inline struct sched_entity *paren
> > > * increased. Here we update the fair scheduling stats and
> > > * then put the task into the rbtree:
> > > */
> > > -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
> > > +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > {
> > > struct cfs_rq *cfs_rq;
> > > struct sched_entity *se = &p->se;
> > > + int wakeup = flags & ENQUEUE_WAKEUP;
> >
> > Minor nit: was it necessary to create a new int, why not just flags &=
> > ENQUEUE_WAKEUP, plus subsequent renaming where necessary.
As Darren already said, the rename of wakeup/sleep to flags is needed
because of the multiple value thing and consistency.
> Well... I've copied the entire function here for context:
>
> static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int
> flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> int wakeup = flags & ENQUEUE_WAKEUP;
>
> for_each_sched_entity(se) {
> if (se->on_rq)
> break;
> cfs_rq = cfs_rq_of(se);
> enqueue_entity(cfs_rq, se, wakeup);
> wakeup = 1;
> }
> }
>
> Note that "wakeup = 1;" for all but the initial entity which uses the
> flag that was passed in. So if this is correct behavior, then the new
> integer seems like a reasonable approach to me. Note that the
> dequeue_task_fair has a parallel implementation.
>
> Peter, can you explain why only the first entity is subject to the value
> of the passed-in flags in these two functions. I understand this was
> the orginal behavior as well.
group scheduling :-)
So, if you haven't ran away by now I'll continue explaining.
The current group scheduler is a hierarchy of schedulers, what
enqueue_task_fair() does is enqueue the task 'somewhere' in that
hierarchy. And then walk up the hierarchy checking to see if each parent
is already enqueued (the if (se->on_rq) bit). If so, it can stop since
all further parents will then too be already enqueued. However, if the
parent is not already enqueued if must enqueue him too.
So what we do is dequeue sleep a scheduler from its parent scheduler
when there are no more tasks to run, and enqueue wakeup when a new
'task' arrives. Note that a 'task' can be a so called super-task which
is essentially another scheduler.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-15 8:03 ` Peter Zijlstra
@ 2008-07-15 9:19 ` John Kacur
0 siblings, 0 replies; 8+ messages in thread
From: John Kacur @ 2008-07-15 9:19 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Darren Hart, linux-rt-users, Steven Rostedt, Clark Williams,
Ingo Molnar, Thomas Gleixner, Dmitry Adamushko, Gregory Haskins
On Tue, Jul 15, 2008 at 10:03 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Sat, 2008-07-05 at 08:18 -0700, Darren Hart wrote:
>> On Fri, 2008-07-04 at 15:08 +0200, John Kacur wrote:
>
>> > On Fri, Jul 4, 2008 at 12:41 AM, Darren Hart <dvhltc@us.ibm.com> wrote:
>> > > Enqueue deprioritized RT tasks to head of prio array
>> > >
>> > > This patch backports Peter Z's enqueue to head of prio array on
>> > > de-prioritization to 2.6.24.7-rt14 which doesn't have the
>> > > enqueue_rt_entity and associated changes.
>> > >
>> > > I've run several long running real-time java benchmarks and it's
>> > > holding so far. Steven, please consider this patch for inclusion
>> > > in the next 2.6.24.7-rtX release.
>> > >
>> > > Peter, I didn't include your Signed-off-by as only about half your
>> > > original patch applied to 2.6.24.7-r14. If you're happy with this
>> > > version, would you also sign off?
>> > >
>> > > Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
>> > >
>> > >
>> > > ---
>> > > Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
>> > > ===================================================================
>> > > --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
>> > > +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
>> > > @@ -897,11 +897,16 @@ struct uts_namespace;
>> > > struct rq;
>> > > struct sched_domain;
>> > >
>> > > +#define ENQUEUE_WAKEUP 0x01
>> > > +#define ENQUEUE_HEAD 0x02
>> > > +
>> > > +#define DEQUEUE_SLEEP 0x01
>> > > +
>> >
>> > Question: is ENQUEUE_WAKEUP equal to DEQUEUE_SLEEP by design or
>> > coincidence?
>>
>> Coincidence. The ENQUEUE_* flags are only to be used with the
>> enqueue_task* methods, while the DEQUEUE_* flags are for deqeue_task*.
>> Note that the conversion of sleep to the DEQUEUE_SLEEP flag isn't really
>> necessary as there is only the one flag, but it makes the calls
>> parallel, which I suspect was Peter's intention (but I speculate here).
>
> Indeed.
>
>> > The renaming of wakeup and sleep to flags makes it at
>> > least superficially seem like they overlap. Since a large part of the
>> > patch is renaming, it might be easier to understand if the renaming
>> > was done as a separate patch, but on the other hand, that is probably
>> > just a PITA. :)
>>
>> Seems a small enough patch to be all in one to me. If others object
>> I'll split it out, but again, I tried to keep the backport as close to
>> Peter's original patch as possible.
>
> I just hacked together what was needed to post out an RFC, never made it
> a 'pretty' series ;-)
>
> One could go the extra length and make it multiple patches, but its not
> too big, so I'm not sure we should worry about that.
>
>> >
>> > > struct sched_class {
>> > > const struct sched_class *next;
>> > >
>> > > - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
>> > > - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
>> > > + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
>> > > + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
>> > > void (*yield_task) (struct rq *rq);
>> > > int (*select_task_rq)(struct task_struct *p, int sync);
>> > >
>
>> > > Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
>> > > ===================================================================
>> > > --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
>> > > +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
>> > > @@ -756,10 +756,11 @@ static inline struct sched_entity *paren
>> > > * increased. Here we update the fair scheduling stats and
>> > > * then put the task into the rbtree:
>> > > */
>> > > -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
>> > > +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>> > > {
>> > > struct cfs_rq *cfs_rq;
>> > > struct sched_entity *se = &p->se;
>> > > + int wakeup = flags & ENQUEUE_WAKEUP;
>> >
>> > Minor nit: was it necessary to create a new int, why not just flags &=
>> > ENQUEUE_WAKEUP, plus subsequent renaming where necessary.
>
> As Darren already said, the rename of wakeup/sleep to flags is needed
> because of the multiple value thing and consistency.
>
>> Well... I've copied the entire function here for context:
>>
>> static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int
>> flags)
>> {
>> struct cfs_rq *cfs_rq;
>> struct sched_entity *se = &p->se;
>> int wakeup = flags & ENQUEUE_WAKEUP;
>>
>> for_each_sched_entity(se) {
>> if (se->on_rq)
>> break;
>> cfs_rq = cfs_rq_of(se);
>> enqueue_entity(cfs_rq, se, wakeup);
>> wakeup = 1;
>> }
>> }
>>
>> Note that "wakeup = 1;" for all but the initial entity which uses the
>> flag that was passed in. So if this is correct behavior, then the new
>> integer seems like a reasonable approach to me. Note that the
>> dequeue_task_fair has a parallel implementation.
>>
>> Peter, can you explain why only the first entity is subject to the value
>> of the passed-in flags in these two functions. I understand this was
>> the orginal behavior as well.
>
> group scheduling :-)
>
> So, if you haven't ran away by now I'll continue explaining.
>
> The current group scheduler is a hierarchy of schedulers, what
> enqueue_task_fair() does is enqueue the task 'somewhere' in that
> hierarchy. And then walk up the hierarchy checking to see if each parent
> is already enqueued (the if (se->on_rq) bit). If so, it can stop since
> all further parents will then too be already enqueued. However, if the
> parent is not already enqueued if must enqueue him too.
>
> So what we do is dequeue sleep a scheduler from its parent scheduler
> when there are no more tasks to run, and enqueue wakeup when a new
> 'task' arrives. Note that a 'task' can be a so called super-task which
> is essentially another scheduler.
>
>
Hey Peter, thanks for the excellent explanations. I think Darren is
waiting for a Signed-off-by: you, since it is a backport of your work.
I was just reviewing it for fun. Darren if you want to add a
Reviewed-by: John Kacur <jkacur at gmail dot com>
to the patch, pls feel free too, but it is not necessary.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [RFC][PATCH] fix SCHED_FIFO spec violation (backport)
2008-07-03 22:41 [RFC][PATCH] fix SCHED_FIFO spec violation (backport) Darren Hart
2008-07-04 13:08 ` John Kacur
@ 2008-07-15 10:01 ` Peter Zijlstra
1 sibling, 0 replies; 8+ messages in thread
From: Peter Zijlstra @ 2008-07-15 10:01 UTC (permalink / raw)
To: Darren Hart
Cc: linux-rt-users, Steven Rostedt, Clark Williams, Ingo Molnar,
Thomas Gleixner, Dmitry Adamushko, Gregory Haskins, John Kacur
On Thu, 2008-07-03 at 15:41 -0700, Darren Hart wrote:
> Enqueue deprioritized RT tasks to head of prio array
>
> This patch backports Peter Z's enqueue to head of prio array on
> de-prioritization to 2.6.24.7-rt14 which doesn't have the
> enqueue_rt_entity and associated changes.
>
> I've run several long running real-time java benchmarks and it's
> holding so far. Steven, please consider this patch for inclusion
> in the next 2.6.24.7-rtX release.
>
> Peter, I didn't include your Signed-off-by as only about half your
> original patch applied to 2.6.24.7-r14. If you're happy with this
> version, would you also sign off?
Sure thing,
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
(Thanks for pointing me at this John, I completely missed it)..
> Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
>
>
> ---
> Index: linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/include/linux/sched.h
> +++ linux-2.6.24.7-ibmrt2.6-view/include/linux/sched.h
> @@ -897,11 +897,16 @@ struct uts_namespace;
> struct rq;
> struct sched_domain;
>
> +#define ENQUEUE_WAKEUP 0x01
> +#define ENQUEUE_HEAD 0x02
> +
> +#define DEQUEUE_SLEEP 0x01
> +
> struct sched_class {
> const struct sched_class *next;
>
> - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
> - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
> + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> void (*yield_task) (struct rq *rq);
> int (*select_task_rq)(struct task_struct *p, int sync);
>
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched.c
> @@ -1046,7 +1046,7 @@ static const u32 prio_to_wmult[40] = {
> /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
> };
>
> -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
> +static void activate_task(struct rq *rq, struct task_struct *p, int flags);
>
> /*
> * runqueue iterator, to support SMP load-balancing between different
> @@ -1155,16 +1155,16 @@ static void set_load_weight(struct task_
> p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
> }
>
> -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> sched_info_queued(p);
> - p->sched_class->enqueue_task(rq, p, wakeup);
> + p->sched_class->enqueue_task(rq, p, flags);
> p->se.on_rq = 1;
> }
>
> -static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
> {
> - p->sched_class->dequeue_task(rq, p, sleep);
> + p->sched_class->dequeue_task(rq, p, flags);
> p->se.on_rq = 0;
> }
>
> @@ -1219,26 +1219,26 @@ static int effective_prio(struct task_st
> /*
> * activate_task - move a task to the runqueue.
> */
> -static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
> +static void activate_task(struct rq *rq, struct task_struct *p, int flags)
> {
> if (p->state == TASK_UNINTERRUPTIBLE)
> rq->nr_uninterruptible--;
>
> ftrace_event_task_activate(p, cpu_of(rq));
> - enqueue_task(rq, p, wakeup);
> + enqueue_task(rq, p, flags);
> inc_nr_running(p, rq);
> }
>
> /*
> * deactivate_task - remove a task from the runqueue.
> */
> -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
> +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
> {
> if (p->state == TASK_UNINTERRUPTIBLE)
> rq->nr_uninterruptible++;
>
> ftrace_event_task_deactivate(p, cpu_of(rq));
> - dequeue_task(rq, p, sleep);
> + dequeue_task(rq, p, flags);
> dec_nr_running(p, rq);
> }
>
> @@ -1759,7 +1759,7 @@ out_activate:
> else
> schedstat_inc(p, se.nr_wakeups_remote);
> update_rq_clock(rq);
> - activate_task(rq, p, 1);
> + activate_task(rq, p, ENQUEUE_WAKEUP);
> check_preempt_curr(rq, p);
> success = 1;
>
> @@ -3968,7 +3968,7 @@ asmlinkage void __sched __schedule(void)
> prev->state = TASK_RUNNING;
> } else {
> touch_softlockup_watchdog();
> - deactivate_task(rq, prev, 1);
> + deactivate_task(rq, prev, DEQUEUE_SLEEP);
> }
> switch_count = &prev->nvcsw;
> }
> @@ -4431,7 +4431,7 @@ EXPORT_SYMBOL(sleep_on_timeout);
> void task_setprio(struct task_struct *p, int prio)
> {
> unsigned long flags;
> - int oldprio, prev_resched, on_rq, running;
> + int oldprio, prev_resched, on_rq, running, down;
> struct rq *rq;
> const struct sched_class *prev_class = p->sched_class;
>
> @@ -4472,6 +4472,7 @@ void task_setprio(struct task_struct *p,
> else
> p->sched_class = &fair_sched_class;
>
> + down = (prio > p->prio) ? ENQUEUE_HEAD : 0;
> p->prio = prio;
>
> // trace_special_pid(p->pid, __PRIO(oldprio), PRIO(p));
> @@ -4480,7 +4481,7 @@ void task_setprio(struct task_struct *p,
> if (running)
> p->sched_class->set_curr_task(rq);
> if (on_rq) {
> - enqueue_task(rq, p, 0);
> + enqueue_task(rq, p, down);
> check_class_changed(rq, p, prev_class, oldprio, running);
> }
> // trace_special(prev_resched, _need_resched(), 0);
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_fair.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_fair.c
> @@ -756,10 +756,11 @@ static inline struct sched_entity *paren
> * increased. Here we update the fair scheduling stats and
> * then put the task into the rbtree:
> */
> -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> + int wakeup = flags & ENQUEUE_WAKEUP;
>
> for_each_sched_entity(se) {
> if (se->on_rq)
> @@ -775,10 +776,11 @@ static void enqueue_task_fair(struct rq
> * decreased. We remove the task from the rbtree and
> * update the fair scheduling stats:
> */
> -static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> struct cfs_rq *cfs_rq;
> struct sched_entity *se = &p->se;
> + int sleep = flags & DEQUEUE_SLEEP;
>
> for_each_sched_entity(se) {
> cfs_rq = cfs_rq_of(se);
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_idletask.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_idletask.c
> @@ -31,7 +31,7 @@ static struct task_struct *pick_next_tas
> * message if some code attempts to do it:
> */
> static void
> -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
> +dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
> {
> spin_unlock_irq(&rq->lock);
> printk(KERN_ERR "bad: scheduling from the idle thread!\n");
> Index: linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> ===================================================================
> --- linux-2.6.24.7-ibmrt2.6-view.orig/kernel/sched_rt.c
> +++ linux-2.6.24.7-ibmrt2.6-view/kernel/sched_rt.c
> @@ -181,11 +181,16 @@ unsigned long rt_nr_uninterruptible_cpu(
> return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
> }
>
> -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
> +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct rt_prio_array *array = &rq->rt.active;
>
> - list_add_tail(&p->run_list, array->queue + p->prio);
> +
> + if (unlikely(flags & ENQUEUE_HEAD))
> + list_add(&p->run_list, array->queue + p->prio);
> + else
> + list_add_tail(&p->run_list, array->queue + p->prio);
> +
> __set_bit(p->prio, array->bitmap);
> inc_rt_tasks(p, rq);
>
> @@ -196,7 +201,7 @@ static void enqueue_task_rt(struct rq *r
> /*
> * Adding/removing a task to/from a priority array:
> */
> -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
> +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct rt_prio_array *array = &rq->rt.active;
>
> @@ -306,7 +311,7 @@ static void put_prev_task_rt(struct rq *
> #define RT_MAX_TRIES 3
>
> static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
> -static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
> +static void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
>
> static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
> {
>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2008-07-15 10:01 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-03 22:41 [RFC][PATCH] fix SCHED_FIFO spec violation (backport) Darren Hart
2008-07-04 13:08 ` John Kacur
2008-07-05 15:18 ` Darren Hart
2008-07-07 11:00 ` John Kacur
2008-07-07 15:24 ` Darren Hart
2008-07-15 8:03 ` Peter Zijlstra
2008-07-15 9:19 ` John Kacur
2008-07-15 10:01 ` Peter Zijlstra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).