From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel@vger.kernel.org
Cc: Stefani Seibold <stefani@seibold.net>,
Dario Faggioli <raistlin@linux.it>,
Nick Piggin <nickpiggin@yahoo.com.au>,
Max Krasnyansky <maxk@qualcomm.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Thomas Gleixner <tglx@linutronix.de>, Ingo Molnar <mingo@elte.hu>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 5/6] sched: rt-bandwidth fixes
Date: Tue, 19 Aug 2008 12:33:06 +0200 [thread overview]
Message-ID: <20080819103844.376888858@chello.nl> (raw)
In-Reply-To: 20080819103301.787700742@chello.nl
[-- Attachment #1: sched-rt-bw-disable.patch --]
[-- Type: text/plain, Size: 6345 bytes --]
The last patch allows sysctl_sched_rt_runtime to disable bandwidth accounting
for the group scheduler - however it doesn't deal with sched_setscheduler(),
which will keep tasks out of groups that have no assigned runtime.
If we relax this, we get into the situation where RT tasks can get into a group
when we disable bandwidth control, and then starve them by enabling it again.
Rework the schedulability code to check for this condition and fail to turn
on bandwidth control with -EBUSY when this situation is found.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
kernel/sched.c | 125 ++++++++++++++++++++++++++++-----------------------------
1 file changed, 63 insertions(+), 62 deletions(-)
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -300,9 +300,9 @@ static DEFINE_PER_CPU(struct cfs_rq, ini
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
#endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
#define root_task_group init_task_group
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_USER_SCHED */
/* task_group_lock serializes add/remove of task groups and also changes to
* a task group's cpu shares.
@@ -1387,7 +1387,7 @@ static inline void dec_cpu_load(struct r
update_load_sub(&rq->load, load);
}
-#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED))
+#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(SCHED_RT_GROUP_SCHED)
typedef int (*tg_visitor)(struct task_group *, void *);
/*
@@ -5082,7 +5082,8 @@ recheck:
* Do not allow realtime tasks into groups that have no runtime
* assigned.
*/
- if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+ if (rt_bandwidth_enabled() && rt_policy(policy) &&
+ task_group(p)->rt_bandwidth.rt_runtime == 0)
return -EPERM;
#endif
@@ -8707,73 +8708,77 @@ static DEFINE_MUTEX(rt_constraints_mutex
static unsigned long to_ratio(u64 period, u64 runtime)
{
if (runtime == RUNTIME_INF)
- return 1ULL << 16;
+ return 1ULL << 20;
- return div64_u64(runtime << 16, period);
+ return div64_u64(runtime << 20, period);
}
-#ifdef CONFIG_CGROUP_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+/* Must be called with tasklist_lock held */
+static inline int tg_has_rt_tasks(struct task_group *tg)
{
- struct task_group *tgi, *parent = tg->parent;
- unsigned long total = 0;
+ struct task_struct *g, *p;
- if (!parent) {
- if (global_rt_period() < period)
- return 0;
+ do_each_thread(g, p) {
+ if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+ return 1;
+ } while_each_thread(g, p);
- return to_ratio(period, runtime) <
- to_ratio(global_rt_period(), global_rt_runtime());
- }
+ return 0;
+}
- if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
- return 0;
+struct rt_schedulable_data {
+ struct task_group *tg;
+ u64 rt_period;
+ u64 rt_runtime;
+};
- rcu_read_lock();
- list_for_each_entry_rcu(tgi, &parent->children, siblings) {
- if (tgi == tg)
- continue;
+static int tg_schedulable(struct task_group *tg, void *data)
+{
+ struct rt_schedulable_data *d = data;
+ struct task_group *child;
+ unsigned long total, sum = 0;
+ u64 period, runtime;
+
+ period = ktime_to_ns(tg->rt_bandwidth.rt_period);
+ runtime = tg->rt_bandwidth.rt_runtime;
- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- tgi->rt_bandwidth.rt_runtime);
+ if (tg == d->tg) {
+ period = d->rt_period;
+ runtime = d->rt_runtime;
}
- rcu_read_unlock();
- return total + to_ratio(period, runtime) <=
- to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
- parent->rt_bandwidth.rt_runtime);
-}
-#elif defined CONFIG_USER_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
- struct task_group *tgi;
- unsigned long total = 0;
- unsigned long global_ratio =
- to_ratio(global_rt_period(), global_rt_runtime());
+ if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
+ return -EBUSY;
- rcu_read_lock();
- list_for_each_entry_rcu(tgi, &task_groups, list) {
- if (tgi == tg)
- continue;
+ total = to_ratio(period, runtime);
- total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
- tgi->rt_bandwidth.rt_runtime);
+ list_for_each_entry_rcu(child, &tg->children, siblings) {
+ period = ktime_to_ns(child->rt_bandwidth.rt_period);
+ runtime = child->rt_bandwidth.rt_runtime;
+
+ if (child == d->tg) {
+ period = d->rt_period;
+ runtime = d->rt_runtime;
+ }
+
+ sum += to_ratio(period, runtime);
}
- rcu_read_unlock();
- return total + to_ratio(period, runtime) < global_ratio;
+ if (sum > total)
+ return -EINVAL;
+
+ return 0;
}
-#endif
-/* Must be called with tasklist_lock held */
-static inline int tg_has_rt_tasks(struct task_group *tg)
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
{
- struct task_struct *g, *p;
- do_each_thread(g, p) {
- if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
- return 1;
- } while_each_thread(g, p);
- return 0;
+ struct rt_schedulable_data data = {
+ .tg = tg,
+ .rt_period = period,
+ .rt_runtime = runtime,
+ };
+
+ return walk_tg_tree(tg_schedulable, tg_nop, &data);
}
static int tg_set_bandwidth(struct task_group *tg,
@@ -8783,14 +8788,9 @@ static int tg_set_bandwidth(struct task_
mutex_lock(&rt_constraints_mutex);
read_lock(&tasklist_lock);
- if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
- err = -EBUSY;
+ err = __rt_schedulable(tg, rt_period, rt_runtime);
+ if (err)
goto unlock;
- }
- if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
- err = -EINVAL;
- goto unlock;
- }
spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8867,8 +8867,9 @@ static int sched_rt_global_constraints(v
rt_runtime = tg->rt_bandwidth.rt_runtime;
mutex_lock(&rt_constraints_mutex);
- if (!__rt_schedulable(tg, rt_period, rt_runtime))
- ret = -EINVAL;
+ read_lock(&tasklist_lock);
+ ret = __rt_schedulable(tg, rt_period, rt_runtime);
+ read_unlock(&tasklist_lock);
mutex_unlock(&rt_constraints_mutex);
return ret;
--
next prev parent reply other threads:[~2008-08-19 10:45 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-19 10:33 [PATCH 0/6] sched: rt-bandwidth fixes Peter Zijlstra
2008-08-19 10:33 ` [PATCH 1/6] sched: rt-bandwidth for user grouping interface Peter Zijlstra
2008-08-19 10:33 ` [PATCH 2/6] sched: rt-bandwidth accounting fix Peter Zijlstra
2008-08-19 18:33 ` Max Krasnyansky
2008-08-19 18:38 ` Peter Zijlstra
2008-08-19 10:33 ` [PATCH 3/6] sched: rt-bandwidth group disable fixes Peter Zijlstra
2008-08-19 10:33 ` [PATCH 4/6] sched: extract walk_tg_tree() Peter Zijlstra
2008-08-19 10:33 ` Peter Zijlstra [this message]
2008-08-19 10:33 ` [PATCH 6/6] sched: disabled rt-bandwidth by default Peter Zijlstra
2008-08-19 11:05 ` Ingo Molnar
2008-08-19 11:11 ` Ingo Molnar
2008-08-19 11:42 ` [PATCH] sched: extract walk_tg_tree(), fix Ingo Molnar
2008-08-19 11:17 ` [PATCH 6/6] sched: disabled rt-bandwidth by default Nick Piggin
2008-08-19 12:59 ` Ingo Molnar
2008-08-19 18:15 ` Max Krasnyansky
2008-08-20 11:56 ` Nick Piggin
2008-08-26 9:00 ` Nick Piggin
2008-08-26 9:30 ` Ingo Molnar
2008-08-26 9:44 ` Nick Piggin
2008-08-26 10:29 ` Ingo Molnar
2008-08-26 11:03 ` Nick Piggin
2008-08-26 9:54 ` Nick Piggin
2008-08-26 11:09 ` Thomas Gleixner
2008-08-26 11:27 ` Nick Piggin
2008-08-26 12:50 ` Theodore Tso
2008-08-26 13:31 ` Stefani Seibold
2008-08-26 17:55 ` Theodore Tso
2008-08-26 21:37 ` Thomas Gleixner
2008-08-26 22:49 ` Andi Kleen
2008-08-27 10:08 ` Nick Piggin
2008-08-28 10:54 ` Ingo Molnar
2008-08-28 11:09 ` Andi Kleen
2008-08-28 11:19 ` Peter Zijlstra
2008-08-28 11:28 ` Ingo Molnar
2008-08-28 11:50 ` Andi Kleen
2008-08-28 12:00 ` Peter Zijlstra
2008-08-28 12:14 ` Andi Kleen
2008-08-28 12:18 ` Nick Piggin
2008-08-28 16:19 ` Max Krasnyansky
2008-08-28 16:25 ` Ingo Molnar
2008-08-28 16:33 ` Andi Kleen
2008-08-28 12:03 ` Nick Piggin
2008-08-28 13:07 ` Ingo Molnar
2008-08-28 13:45 ` Nick Piggin
2008-08-28 12:29 ` Nick Piggin
2008-08-27 10:04 ` Nick Piggin
2008-08-26 13:47 ` Mark Hounschell
2008-08-26 23:00 ` Steven Rostedt
2008-08-27 18:55 ` Chris Friesen
2008-08-28 14:15 ` Steven Rostedt
2008-08-28 14:30 ` Ingo Molnar
2008-08-28 14:36 ` Nick Piggin
2008-08-28 15:12 ` Steven Rostedt
2008-08-28 15:34 ` Nick Piggin
2008-08-28 15:50 ` Steven Rostedt
2008-08-28 17:26 ` Linus Torvalds
2008-08-28 18:04 ` Steven Rostedt
2008-08-28 18:10 ` Darren Hart
2008-08-28 18:16 ` Mark Hounschell
2008-08-28 18:42 ` Linus Torvalds
2008-08-28 18:53 ` Steven Rostedt
2008-08-29 7:56 ` Mike Galbraith
2008-08-29 8:06 ` Peter Zijlstra
2008-08-29 8:47 ` Mike Galbraith
2008-08-28 19:39 ` Stefani Seibold
2008-08-28 20:53 ` Alan Cox
2008-08-30 6:33 ` Nick Piggin
2008-08-28 16:33 ` Max Krasnyansky
2008-08-28 17:22 ` John Kacur
2008-08-28 16:05 ` Peter Zijlstra
2008-08-28 16:15 ` Steven Rostedt
2008-08-28 16:29 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080819103844.376888858@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=linux-kernel@vger.kernel.org \
--cc=maxk@qualcomm.com \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
--cc=raistlin@linux.it \
--cc=stefani@seibold.net \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox