* [PATCH 0/3] various scheduler patches
@ 2007-12-18 17:41 Peter Zijlstra
2007-12-18 17:41 ` [PATCH 1/3] sched: rt: account the cpu time during the tick Peter Zijlstra
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Peter Zijlstra @ 2007-12-18 17:41 UTC (permalink / raw)
To: linux-kernel; +Cc: mingo
The first patch fixes a serious bug in RT task accounting and should go in .24
The latter two add a RT watchdog rlimit that ensures RT tasks don't
unintentionally hog the cpu.
--
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/3] sched: rt: account the cpu time during the tick
2007-12-18 17:41 [PATCH 0/3] various scheduler patches Peter Zijlstra
@ 2007-12-18 17:41 ` Peter Zijlstra
2007-12-18 17:41 ` [PATCH 2/3] sched: sched_rt_entity Peter Zijlstra
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2007-12-18 17:41 UTC (permalink / raw)
To: linux-kernel; +Cc: mingo, Peter Zijlstra
[-- Attachment #1: sched-rt-update.patch --]
[-- Type: text/plain, Size: 803 bytes --]
Realtime tasks would not account their runtime during ticks. Which would lead
to:
struct sched_param param = { .sched_priority = 10 };
pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m);
while (1) ;
Not showing up in top.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched_rt.c | 2 ++
1 file changed, 2 insertions(+)
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -837,6 +837,8 @@ static void prio_changed_rt(struct rq *r
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
+ update_curr_rt(rq);
+
/*
* RR tasks need a special form of timeslice management.
* FIFO tasks have no timeslices.
--
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3] sched: sched_rt_entity
2007-12-18 17:41 [PATCH 0/3] various scheduler patches Peter Zijlstra
2007-12-18 17:41 ` [PATCH 1/3] sched: rt: account the cpu time during the tick Peter Zijlstra
@ 2007-12-18 17:41 ` Peter Zijlstra
2007-12-18 17:41 ` [PATCH 3/3] sched: SCHED_FIFO/SCHED_RR watchdog timer Peter Zijlstra
2007-12-19 10:18 ` [PATCH 0/3] various scheduler patches Ingo Molnar
3 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2007-12-18 17:41 UTC (permalink / raw)
To: linux-kernel; +Cc: mingo, Peter Zijlstra, Srivatsa Vaddagiri
[-- Attachment #1: sched-rt-entity.patch --]
[-- Type: text/plain, Size: 5533 bytes --]
Move the task_struct members specific to rt scheduling together.
A future optimization could be to put sched_entity and sched_rt_entity
into a union.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
---
include/linux/init_task.h | 5 +++--
include/linux/sched.h | 8 ++++++--
kernel/sched.c | 2 +-
kernel/sched_rt.c | 20 ++++++++++----------
mm/oom_kill.c | 2 +-
5 files changed, 21 insertions(+), 16 deletions(-)
Index: linux-2.6/include/linux/init_task.h
===================================================================
--- linux-2.6.orig/include/linux/init_task.h
+++ linux-2.6/include/linux/init_task.h
@@ -133,9 +133,10 @@ extern struct group_info init_groups;
.nr_cpus_allowed = NR_CPUS, \
.mm = NULL, \
.active_mm = &init_mm, \
- .run_list = LIST_HEAD_INIT(tsk.run_list), \
+ .rt = { \
+ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \
+ .time_slice = HZ, }, \
.ioprio = 0, \
- .time_slice = HZ, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -934,6 +934,11 @@ struct sched_entity {
#endif
};
+struct sched_rt_entity {
+ struct list_head run_list;
+ unsigned int time_slice;
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
@@ -950,9 +955,9 @@ struct task_struct {
#endif
int prio, static_prio, normal_prio;
- struct list_head run_list;
const struct sched_class *sched_class;
struct sched_entity se;
+ struct sched_rt_entity rt;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
@@ -977,7 +982,6 @@ struct task_struct {
unsigned int policy;
cpumask_t cpus_allowed;
int nr_cpus_allowed;
- unsigned int time_slice;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -111,7 +111,7 @@ static void enqueue_task_rt(struct rq *r
{
struct rt_prio_array *array = &rq->rt.active;
- list_add_tail(&p->run_list, array->queue + p->prio);
+ list_add_tail(&p->rt.run_list, array->queue + p->prio);
__set_bit(p->prio, array->bitmap);
inc_cpu_load(rq, p->se.load.weight);
@@ -127,7 +127,7 @@ static void dequeue_task_rt(struct rq *r
update_curr_rt(rq);
- list_del(&p->run_list);
+ list_del(&p->rt.run_list);
if (list_empty(array->queue + p->prio))
__clear_bit(p->prio, array->bitmap);
dec_cpu_load(rq, p->se.load.weight);
@@ -143,7 +143,7 @@ static void requeue_task_rt(struct rq *r
{
struct rt_prio_array *array = &rq->rt.active;
- list_move_tail(&p->run_list, array->queue + p->prio);
+ list_move_tail(&p->rt.run_list, array->queue + p->prio);
}
static void
@@ -212,7 +212,7 @@ static struct task_struct *pick_next_tas
return NULL;
queue = array->queue + idx;
- next = list_entry(queue->next, struct task_struct, run_list);
+ next = list_entry(queue->next, struct task_struct, rt.run_list);
next->se.exec_start = rq->clock;
@@ -261,14 +261,14 @@ static struct task_struct *pick_next_hig
queue = array->queue + idx;
BUG_ON(list_empty(queue));
- next = list_entry(queue->next, struct task_struct, run_list);
+ next = list_entry(queue->next, struct task_struct, rt.run_list);
if (unlikely(pick_rt_task(rq, next, cpu)))
goto out;
if (queue->next->next != queue) {
/* same prio task */
next = list_entry(queue->next->next, struct task_struct,
- run_list);
+ rt.run_list);
if (pick_rt_task(rq, next, cpu))
goto out;
}
@@ -282,7 +282,7 @@ static struct task_struct *pick_next_hig
queue = array->queue + idx;
BUG_ON(list_empty(queue));
- list_for_each_entry(next, queue, run_list) {
+ list_for_each_entry(next, queue, rt.run_list) {
if (pick_rt_task(rq, next, cpu))
goto out;
}
@@ -846,16 +846,16 @@ static void task_tick_rt(struct rq *rq,
if (p->policy != SCHED_RR)
return;
- if (--p->time_slice)
+ if (--p->rt.time_slice)
return;
- p->time_slice = DEF_TIMESLICE;
+ p->rt.time_slice = DEF_TIMESLICE;
/*
* Requeue to the end of queue if we are not the only element
* on the queue:
*/
- if (p->run_list.prev != p->run_list.next) {
+ if (p->rt.run_list.prev != p->rt.run_list.next) {
requeue_task_rt(rq, p);
set_tsk_need_resched(p);
}
Index: linux-2.6/mm/oom_kill.c
===================================================================
--- linux-2.6.orig/mm/oom_kill.c
+++ linux-2.6/mm/oom_kill.c
@@ -286,7 +286,7 @@ static void __oom_kill_task(struct task_
* all the memory it needs. That way it should be able to
* exit() and clear out its resources quickly...
*/
- p->time_slice = HZ;
+ p->rt.time_slice = HZ;
set_tsk_thread_flag(p, TIF_MEMDIE);
force_sig(SIGKILL, p);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -1685,7 +1685,7 @@ static void __sched_fork(struct task_str
p->se.wait_max = 0;
#endif
- INIT_LIST_HEAD(&p->run_list);
+ INIT_LIST_HEAD(&p->rt.run_list);
p->se.on_rq = 0;
#ifdef CONFIG_PREEMPT_NOTIFIERS
--
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/3] sched: SCHED_FIFO/SCHED_RR watchdog timer
2007-12-18 17:41 [PATCH 0/3] various scheduler patches Peter Zijlstra
2007-12-18 17:41 ` [PATCH 1/3] sched: rt: account the cpu time during the tick Peter Zijlstra
2007-12-18 17:41 ` [PATCH 2/3] sched: sched_rt_entity Peter Zijlstra
@ 2007-12-18 17:41 ` Peter Zijlstra
2007-12-19 10:18 ` [PATCH 0/3] various scheduler patches Ingo Molnar
3 siblings, 0 replies; 5+ messages in thread
From: Peter Zijlstra @ 2007-12-18 17:41 UTC (permalink / raw)
To: linux-kernel
Cc: mingo, Peter Zijlstra, Thomas Gleixner, Lennart Poettering,
Michael Kerrisk, Ulrich Drepper
[-- Attachment #1: sched-watchdog.patch --]
[-- Type: text/plain, Size: 4576 bytes --]
Introduce a new rlimit that allows the user to set a runtime timeout on
real-time tasks their slice. Once this limit is exceeded the task will receive
SIGXCPU.
So it measures runtime since the last sleep.
Input and ideas by Thomas Gleixner and Lennart Poettering.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Lennart Poettering <mzxreary@0pointer.de>
CC: Michael Kerrisk <mtk.manpages@googlemail.com>
CC: Ulrich Drepper <drepper@redhat.com>
---
include/asm-generic/resource.h | 5 +++--
include/linux/sched.h | 1 +
kernel/posix-cpu-timers.c | 28 ++++++++++++++++++++++++++++
kernel/sched_rt.c | 30 ++++++++++++++++++++++++++++++
4 files changed, 62 insertions(+), 2 deletions(-)
Index: linux-2.6/include/asm-generic/resource.h
===================================================================
--- linux-2.6.orig/include/asm-generic/resource.h
+++ linux-2.6/include/asm-generic/resource.h
@@ -44,8 +44,8 @@
#define RLIMIT_NICE 13 /* max nice prio allowed to raise to
0-39 for nice level 19 .. -20 */
#define RLIMIT_RTPRIO 14 /* maximum realtime priority */
-
-#define RLIM_NLIMITS 15
+#define RLIMIT_RTTIME 15 /* timeout for RT tasks in us */
+#define RLIM_NLIMITS 16
/*
* SuS says limits have to be unsigned.
@@ -86,6 +86,7 @@
[RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \
[RLIMIT_NICE] = { 0, 0 }, \
[RLIMIT_RTPRIO] = { 0, 0 }, \
+ [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \
}
#endif /* __KERNEL__ */
Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -116,6 +116,9 @@ static void enqueue_task_rt(struct rq *r
inc_cpu_load(rq, p->se.load.weight);
inc_rt_tasks(p, rq);
+
+ if (wakeup)
+ p->rt.timeout = 0;
}
/*
@@ -834,11 +837,38 @@ static void prio_changed_rt(struct rq *r
}
}
+static void watchdog(struct rq *rq, struct task_struct *p)
+{
+ unsigned long soft, hard;
+
+ if (!p->signal)
+ return;
+
+ soft = p->signal->rlim[RLIMIT_RTTIME].rlim_cur;
+ hard = p->signal->rlim[RLIMIT_RTTIME].rlim_max;
+
+ if (soft != RLIM_INFINITY) {
+ unsigned long next;
+
+ p->rt.timeout++;
+ next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
+ if (next > p->rt.timeout) {
+ u64 next_time = p->se.sum_exec_runtime;
+
+ next_time += next * (NSEC_PER_SEC/HZ);
+ if (p->it_sched_expires > next_time)
+ p->it_sched_expires = next_time;
+ } else
+ p->it_sched_expires = p->se.sum_exec_runtime;
+ }
+}
static void task_tick_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
+ watchdog(rq, p);
+
/*
* RR tasks need a special form of timeslice management.
* FIFO tasks have no timeslices.
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -937,6 +937,7 @@ struct sched_entity {
struct sched_rt_entity {
struct list_head run_list;
unsigned int time_slice;
+ unsigned long timeout;
};
struct task_struct {
Index: linux-2.6/kernel/posix-cpu-timers.c
===================================================================
--- linux-2.6.orig/kernel/posix-cpu-timers.c
+++ linux-2.6/kernel/posix-cpu-timers.c
@@ -967,6 +967,7 @@ static void check_thread_timers(struct t
{
int maxfire;
struct list_head *timers = tsk->cpu_timers;
+ struct signal_struct *const sig = tsk->signal;
maxfire = 20;
tsk->it_prof_expires = cputime_zero;
@@ -1011,6 +1012,33 @@ static void check_thread_timers(struct t
t->firing = 1;
list_move_tail(&t->entry, firing);
}
+
+ /*
+ * Check for the special case thread timers.
+ */
+ if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
+ unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
+ unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
+
+ if (tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
+ /*
+ * At the hard limit, we just die.
+ * No need to calculate anything else now.
+ */
+ __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ return;
+ }
+ if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
+ /*
+ * At the soft limit, send a SIGXCPU every second.
+ */
+ if (sig->rlim[RLIMIT_RTTIME].rlim_cur
+ < sig->rlim[RLIMIT_RTTIME].rlim_max)
+ sig->rlim[RLIMIT_RTTIME].rlim_cur += USEC_PER_SEC;
+
+ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+ }
+ }
}
/*
--
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 0/3] various scheduler patches
2007-12-18 17:41 [PATCH 0/3] various scheduler patches Peter Zijlstra
` (2 preceding siblings ...)
2007-12-18 17:41 ` [PATCH 3/3] sched: SCHED_FIFO/SCHED_RR watchdog timer Peter Zijlstra
@ 2007-12-19 10:18 ` Ingo Molnar
3 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2007-12-19 10:18 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: linux-kernel
* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> The first patch fixes a serious bug in RT task accounting and should
> go in .24
>
> The latter two add a RT watchdog rlimit that ensures RT tasks don't
> unintentionally hog the cpu.
thanks, applied all of them - and queued up the first one for .24
merging.
Ingo
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-12-19 10:18 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-18 17:41 [PATCH 0/3] various scheduler patches Peter Zijlstra
2007-12-18 17:41 ` [PATCH 1/3] sched: rt: account the cpu time during the tick Peter Zijlstra
2007-12-18 17:41 ` [PATCH 2/3] sched: sched_rt_entity Peter Zijlstra
2007-12-18 17:41 ` [PATCH 3/3] sched: SCHED_FIFO/SCHED_RR watchdog timer Peter Zijlstra
2007-12-19 10:18 ` [PATCH 0/3] various scheduler patches Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox