From: Raistlin <raistlin@linux.it>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
michael trimarchi <michael@evidence.eu.com>,
Fabio Checconi <fabio@gandalf.sssup.it>,
Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
Dhaval Giani <dhaval.giani@gmail.com>,
Johan Eker <johan.eker@ericsson.com>,
"p.faure" <p.faure@akatech.ch>,
Chris Friesen <cfriesen@nortel.com>,
Steven Rostedt <rostedt@goodmis.org>,
Henrik Austad <henrik@austad.us>,
Frederic Weisbecker <fweisbec@gmail.com>,
Darren Hart <darren@dvhart.com>,
Sven-Thorsten Dietrich <sven@thebigcorporation.com>,
Bjoern Brandenburg <bbb@cs.unc.edu>,
Tommaso Cucinotta <tommaso.cucinotta@sssup.it>,
"giuseppe.lipari" <giuseppe.lipari@sssup.it>,
Juri Lelli <juri.lelli@gmail.com>
Subject: [RFC 9/12][PATCH] SCHED_DEADLINE: system wide bandwidth management
Date: Fri, 16 Oct 2009 17:45:40 +0200 [thread overview]
Message-ID: <1255707940.6228.464.camel@Palantir> (raw)
In-Reply-To: <1255707324.6228.448.camel@Palantir>
[-- Attachment #1: Type: text/plain, Size: 8513 bytes --]
This commit adds the capability of controlling the maximum, system wide,
CPU bandwidth that is devoted to SCHED_DEADLINE tasks.
This is done by means of two files:
- /proc/sys/kernel/sched_deadline_runtime_us,
- /proc/sys/kernel/sched_deadline_period_us.
The ratio runtime/period is the total bandwidth all the SCHED_DEADLINE tasks
can use in the system as a whole.
Trying to create tasks in such a way that they exceed this limitation will
fail, as soon as the bandwidth cap would be overcome.
Default value is _zero_ bandwidth available, thus write some numbers in those
files before trying to start some SCHED_DEADLINE task. Setting runtime > period
is allowed (i.e., more than 100% bandwidth available for -deadline tasks),
since it makes more than sense in SMP systems.
Signed-off-by: Raistlin <raistlin@linux.it>
---
include/linux/sched.h | 7 ++
kernel/sched.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++++-
kernel/sysctl.c | 16 +++++
3 files changed, 171 insertions(+), 1 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 478e07c..4de72eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1984,6 +1984,13 @@ int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
+extern unsigned int sysctl_sched_deadline_period;
+extern int sysctl_sched_deadline_runtime;
+
+int sched_deadline_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
+
extern unsigned int sysctl_sched_compat_yield;
#ifdef CONFIG_RT_MUTEXES
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c3e834..d8b6354 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -870,6 +870,34 @@ static inline u64 global_rt_runtime(void)
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
+/*
+ * deadline_runtime/deadline_period is the maximum bandwidth
+ * -deadline tasks can use. It is system wide, i.e., the sum
+ * of the bandwidths of all the tasks, inside every group and
+ * running on any CPU, has to stay below this value!
+ *
+ * default: 0s (= no bandwidth for -deadline tasks)
+ */
+unsigned int sysctl_sched_deadline_period = 0;
+int sysctl_sched_deadline_runtime = 0;
+
+static inline u64 global_deadline_period(void)
+{
+ return (u64)sysctl_sched_deadline_period * NSEC_PER_USEC;
+}
+
+static inline u64 global_deadline_runtime(void)
+{
+ return (u64)sysctl_sched_deadline_runtime * NSEC_PER_USEC;
+}
+
+/*
+ * locking for the system wide deadline bandwidth management.
+ */
+static DEFINE_MUTEX(deadline_constraints_mutex);
+static DEFINE_SPINLOCK(__sysctl_sched_deadline_lock);
+static u64 __sysctl_sched_deadline_total_bw;
+
#ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0)
#endif
@@ -2606,6 +2634,66 @@ static unsigned long to_ratio(u64 period, u64 runtime)
return div64_u64(runtime << 20, period);
}
+static inline
+void __deadline_clear_task_bw(struct task_struct *p, u64 tsk_bw)
+{
+ __sysctl_sched_deadline_total_bw -= tsk_bw;
+}
+
+static inline
+void __deadline_add_task_bw(struct task_struct *p, u64 tsk_bw)
+{
+ __sysctl_sched_deadline_total_bw += tsk_bw;
+}
+
+/*
+ * update the total allocated bandwidth, if a new -deadline task arrives,
+ * leaves or stays, but modifies its bandwidth.
+ */
+static int __deadline_check_task_bw(struct task_struct *p, int policy,
+ struct sched_param_ex *param_ex)
+{
+ u64 bw, tsk_bw;
+ int ret = 0;
+
+ spin_lock(&__sysctl_sched_deadline_lock);
+
+ if (sysctl_sched_deadline_period <= 0)
+ goto unlock;
+
+ bw = to_ratio(sysctl_sched_deadline_period,
+ sysctl_sched_deadline_runtime);
+ if (bw <= 0)
+ return 0;
+
+ if (deadline_policy(policy))
+ tsk_bw = to_ratio(timespec_to_ns(¶m_ex->sched_deadline),
+ timespec_to_ns(¶m_ex->sched_runtime));
+
+ /*
+ * Either if a task, enters, leave, or stays deadline but chanes
+ * its parameters, we need to update accordingly the global
+ * deadline allocated bandwidth.
+ */
+ if (task_has_deadline_policy(p) && !deadline_policy(policy)) {
+ __deadline_clear_task_bw(p, p->dl.bw);
+ ret = 1;
+ } else if (task_has_deadline_policy(p) && deadline_policy(policy) &&
+ bw >= __sysctl_sched_deadline_total_bw - p->dl.bw + tsk_bw) {
+ __deadline_clear_task_bw(p, p->dl.bw);
+ __deadline_add_task_bw(p, tsk_bw);
+ ret = 1;
+ } else if (deadline_policy(policy) && !task_has_deadline_policy(p) &&
+ bw >= __sysctl_sched_deadline_total_bw + tsk_bw) {
+ __deadline_add_task_bw(p, tsk_bw);
+ ret = 1;
+ }
+unlock:
+ spin_unlock(&__sysctl_sched_deadline_lock);
+
+ return ret;
+}
+
/*
* wake_up_new_task - wake up a newly created task for the first time.
*
@@ -2765,8 +2853,10 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
mmdrop(mm);
if (unlikely(prev_state == TASK_DEAD)) {
/* a deadline task is dying: stop the bandwidth timer */
- if (deadline_task(prev))
+ if (deadline_task(prev)) {
+ __deadline_clear_task_bw(prev, prev->dl.bw);
hrtimer_cancel(&prev->dl.dl_timer);
+ }
/*
* Remove function-return probe instances associated with this
@@ -6372,6 +6462,19 @@ recheck:
spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
+ /*
+ * If changing to SCHED_DEADLINE (or changing the parameters of a
+ * SCHED_DEADLINE task) we need to check if enough bandwidth is
+ * available, which might be not true!
+ */
+ if (deadline_policy(policy) || deadline_task(p)) {
+ if (!__deadline_check_task_bw(p, policy, param_ex)) {
+ __task_rq_unlock(rq);
+ spin_unlock_irqrestore(&p->pi_lock, flags);
+ return -EPERM;
+ }
+ }
+
update_rq_clock(rq);
on_rq = p->se.on_rq;
running = task_current(rq, p);
@@ -10569,6 +10672,25 @@ static int sched_rt_global_constraints(void)
}
#endif /* CONFIG_RT_GROUP_SCHED */
+static int sched_deadline_global_constraints(void)
+{
+ u64 bw;
+ int ret = 1;
+
+ spin_lock_irq(&__sysctl_sched_deadline_lock);
+ if (sysctl_sched_deadline_period <= 0)
+ bw = 0;
+ else
+ bw = to_ratio(global_deadline_period(),
+ global_deadline_runtime());
+
+ if (bw < __sysctl_sched_deadline_total_bw)
+ ret = 0;
+ spin_unlock_irq(&__sysctl_sched_deadline_lock);
+
+ return ret;
+}
+
int sched_rt_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
@@ -10599,6 +10721,31 @@ int sched_rt_handler(struct ctl_table *table, int write,
return ret;
}
+int sched_deadline_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ int ret;
+ int old_period, old_runtime;
+
+ mutex_lock(&deadline_constraints_mutex);
+ old_period = sysctl_sched_deadline_period;
+ old_runtime = sysctl_sched_deadline_runtime;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+ if (!ret && write) {
+ if (!sched_deadline_global_constraints()) {
+ sysctl_sched_deadline_period = old_period;
+ sysctl_sched_deadline_runtime = old_runtime;
+ ret = -EINVAL;
+ }
+ }
+ mutex_unlock(&deadline_constraints_mutex);
+
+ return ret;
+}
+
#ifdef CONFIG_CGROUP_SCHED
/* return corresponding task_group object of a cgroup */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d949c5..34117f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -373,6 +373,22 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_deadline_period_us",
+ .data = &sysctl_sched_deadline_period,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &sched_deadline_handler,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sched_deadline_runtime_us",
+ .data = &sysctl_sched_deadline_runtime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &sched_deadline_handler,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "sched_compat_yield",
.data = &sysctl_sched_compat_yield,
.maxlen = sizeof(unsigned int),
--
1.6.0.4
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)
http://blog.linux.it/raistlin / raistlin@ekiga.net /
dario.faggioli@jabber.org
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]
next prev parent reply other threads:[~2009-10-16 15:46 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-16 15:35 [RFC 0/12][PATCH] SCHED_DEADLINE (new version of SCHED_EDF) Raistlin
2009-10-16 15:38 ` [RFC 1/12][PATCH] Extended scheduling parameters structure added Raistlin
2009-12-29 12:15 ` Peter Zijlstra
2010-01-13 10:36 ` Raistlin
2009-10-16 15:40 ` [RFC 0/12][PATCH] SCHED_DEADLINE: core of the scheduling class Raistlin
2009-12-29 12:25 ` Peter Zijlstra
2010-01-13 10:40 ` Dario Faggioli
2009-12-29 12:27 ` Peter Zijlstra
2010-01-13 10:42 ` Raistlin
2009-12-29 14:30 ` Peter Zijlstra
2009-12-29 14:37 ` Peter Zijlstra
2009-12-29 14:40 ` Peter Zijlstra
2010-01-13 16:32 ` Dario Faggioli
2010-01-13 16:47 ` Peter Zijlstra
2009-12-29 14:41 ` Peter Zijlstra
2010-01-13 10:46 ` Raistlin
2009-10-16 15:41 ` [RFC 0/12][PATCH] SCHED_DEADLINE: fork and terminate task logic Raistlin
2009-12-29 15:20 ` Peter Zijlstra
2010-01-13 11:11 ` Raistlin
2010-01-13 16:15 ` Peter Zijlstra
2010-01-13 16:28 ` Dario Faggioli
2010-01-13 21:30 ` Fabio Checconi
2009-10-16 15:41 ` [RFC 0/12][PATCH] SCHED_DEADLINE: added sched_*_ex syscalls Raistlin
2009-10-16 15:42 ` [RFC 0/12][PATCH] SCHED_DEADLINE: added sched-debug support Raistlin
2009-10-16 15:43 ` [RFC 6/12][PATCH] SCHED_DEADLINE: added scheduling latency tracer Raistlin
2009-10-16 15:44 ` [RFC 7/12][PATCH] SCHED_DEADLINE: signal delivery when overrunning Raistlin
2009-12-28 14:19 ` Peter Zijlstra
2010-01-13 9:30 ` Raistlin
2009-10-16 15:44 ` [RFC 8/12][PATCH] SCHED_DEADLINE: wait next instance syscall added Raistlin
2009-12-28 14:30 ` Peter Zijlstra
2010-01-13 9:33 ` Raistlin
2009-10-16 15:45 ` Raistlin [this message]
2009-11-06 11:34 ` [RFC 9/12][PATCH] SCHED_DEADLINE: system wide bandwidth management Dhaval Giani
2009-12-28 14:44 ` Peter Zijlstra
2010-01-13 9:41 ` Raistlin
2009-10-16 15:46 ` [RFC 10/12][PATCH] SCHED_DEADLINE: group bandwidth management code Raistlin
2009-12-28 14:51 ` Peter Zijlstra
2010-01-13 9:46 ` Raistlin
2009-10-16 15:47 ` [RFC 11/12][PATCH] SCHED_DEADLINE: documentation Raistlin
2009-10-16 15:48 ` [RFC 12/12][PATCH] SCHED_DEADLINE: modified sched_*_ex API Raistlin
2009-12-28 15:09 ` Peter Zijlstra
2010-01-13 10:27 ` Raistlin
2010-01-13 16:23 ` Peter Zijlstra
2009-12-29 12:15 ` Peter Zijlstra
2010-01-13 10:33 ` Raistlin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1255707940.6228.464.camel@Palantir \
--to=raistlin@linux.it \
--cc=bbb@cs.unc.edu \
--cc=cfriesen@nortel.com \
--cc=darren@dvhart.com \
--cc=dhaval.giani@gmail.com \
--cc=fabio@gandalf.sssup.it \
--cc=fweisbec@gmail.com \
--cc=giuseppe.lipari@sssup.it \
--cc=henrik@austad.us \
--cc=johan.eker@ericsson.com \
--cc=juri.lelli@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=michael@evidence.eu.com \
--cc=mingo@elte.hu \
--cc=p.faure@akatech.ch \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=sven@thebigcorporation.com \
--cc=tglx@linutronix.de \
--cc=tommaso.cucinotta@sssup.it \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.