public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Raistlin <raistlin@linux.it>
To: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	michael trimarchi <michael@evidence.eu.com>,
	Fabio Checconi <fabio@gandalf.sssup.it>,
	Ingo Molnar <mingo@elte.hu>, Thomas Gleixner <tglx@linutronix.de>,
	Dhaval Giani <dhaval.giani@gmail.com>,
	Johan Eker <johan.eker@ericsson.com>,
	"p.faure" <p.faure@akatech.ch>,
	Chris Friesen <cfriesen@nortel.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Henrik Austad <henrik@austad.us>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Darren Hart <darren@dvhart.com>,
	Sven-Thorsten Dietrich <sven@thebigcorporation.com>,
	Bjoern Brandenburg <bbb@cs.unc.edu>,
	Tommaso Cucinotta <tommaso.cucinotta@sssup.it>,
	"giuseppe.lipari" <giuseppe.lipari@sssup.it>,
	Juri Lelli <juri.lelli@gmail.com>
Subject: [RFC 8/12][PATCH] SCHED_DEADLINE: wait next instance syscall added.
Date: Fri, 16 Oct 2009 17:44:58 +0200	[thread overview]
Message-ID: <1255707898.6228.463.camel@Palantir> (raw)
In-Reply-To: <1255707324.6228.448.camel@Palantir>

[-- Attachment #1: Type: text/plain, Size: 8518 bytes --]

This commit introduces another new SCHED_DEADLINE related syscall. It is
called sched_wait_interval() and it has close-to-clock_nanosleep semantic.

However, for SCHED_DEADLINE tasks, it should be the call with which each
job closes its current instance. In fact, in this case, the task is put to
sleep and, when it wakes up, the scheduler is informed that a new job
arrived, saving the overhead that usually comes with a task activation
to enforce maximum task bandwidth.

Signed-off-by: Raistlin <raistlin@linux.it>
---
 arch/arm/include/asm/unistd.h      |    1 +
 arch/arm/kernel/calls.S            |    1 +
 arch/x86/ia32/ia32entry.S          |    1 +
 arch/x86/include/asm/unistd_32.h   |    3 +-
 arch/x86/include/asm/unistd_64.h   |    2 +
 arch/x86/kernel/syscall_table_32.S |    1 +
 include/linux/sched.h              |    1 +
 include/linux/syscalls.h           |    3 ++
 kernel/sched.c                     |   71 ++++++++++++++++++++++++++++++++++++
 kernel/sched_deadline.c            |    9 +++++
 10 files changed, 92 insertions(+), 1 deletions(-)

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 09b927e..769ced1 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -394,6 +394,7 @@
 #define __NR_sched_setscheduler_ex	(__NR_SYSCALL_BASE+365)
 #define __NR_sched_setparam_ex		(__NR_SYSCALL_BASE+366)
 #define __NR_sched_getparam_ex		(__NR_SYSCALL_BASE+367)
+#define __NR_sched_wait_interval	(__NR_SYSCALL_BASE+368)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 42ad362..8292271 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -377,6 +377,7 @@
 /* 365 */	CALL(sys_sched_setscheduler_ex)
 		CALL(sys_sched_setparam_ex)
 		CALL(sys_sched_getparam_ex)
+		CALL(sys_sched_wait_interval)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3d04691..9306b80 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -845,4 +845,5 @@ ia32_sys_call_table:
 	.quad sys_sched_setscheduler_ex
 	.quad sys_sched_setparam_ex
 	.quad sys_sched_getparam_ex
+	.quad sys_sched_wait_interval		/* 340 */
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 3928c04..63954cb 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -345,10 +345,11 @@
 #define __NR_sched_setscheduler_ex	337
 #define __NR_sched_setparam_ex		338
 #define __NR_sched_getparam_ex		339
+#define __NR_sched_wait_interval	340
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 340
+#define NR_syscalls 341
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 84b0743..63cccc7 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -667,6 +667,8 @@ __SYSCALL(__NR_sched_setscheduler_ex, sys_sched_setscheduler_ex)
 __SYSCALL(__NR_sched_setparam_ex, sys_sched_setparam_ex)
 #define __NR_sched_getparam_ex			301
 __SYSCALL(__NR_sched_getparam_ex, sys_sched_getparam_ex)
+#define __NR_sched_wait_interval		302
+__SYSCALL(__NR_sched_wait_interval, sys_sched_wait_interval)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 38f056c..bd2cc8e 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -339,3 +339,4 @@ ENTRY(sys_call_table)
 	.long sys_sched_setscheduler_ex
 	.long sys_sched_setparam_ex
 	.long sys_sched_getparam_ex
+	.long sys_sched_wait_interval	/* 340 */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 16668f9..478e07c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1088,6 +1088,7 @@ struct sched_class {
 	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
 	void (*yield_task) (struct rq *rq);
+	void (*wait_interval) (struct task_struct *p);
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index dad0b33..e01f59c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -407,6 +407,9 @@ asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
 asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
 					unsigned long __user *user_mask_ptr);
 asmlinkage long sys_sched_yield(void);
+asmlinkage long sys_sched_wait_interval(int flags,
+					const struct timespec __user *rqtp,
+					struct timespec __user *rmtp);
 asmlinkage long sys_sched_get_priority_max(int policy);
 asmlinkage long sys_sched_get_priority_min(int policy);
 asmlinkage long sys_sched_rr_get_interval(pid_t pid,
diff --git a/kernel/sched.c b/kernel/sched.c
index 2c974fd..3c3e834 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6832,6 +6832,77 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
+/**
+ * sys_sched_wait_interval - sleep according to the scheduling class rules.
+ *
+ * This function makes the task sleep for an absolute or relative interval
+ * (clock_nanosleep semantic). The only difference is that, before stopping
+ * the task, it asks its scheduling class if some class specific logic needs
+ * to be triggered right after the wakeup.
+ */
+SYSCALL_DEFINE3(sched_wait_interval, int, flags,
+		const struct timespec __user *, rqtp,
+		struct timespec __user *, rmtp)
+{
+	struct timespec lrqtp;
+	struct hrtimer_sleeper t;
+	enum hrtimer_mode mode = flags & TIMER_ABSTIME ?
+				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL;
+	int ret = 0;
+
+	if (copy_from_user(&lrqtp, rqtp, sizeof(lrqtp)))
+		return -EFAULT;
+
+	if (!timespec_valid(&lrqtp))
+		return -EINVAL;
+
+	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_set_expires(&t.timer, timespec_to_ktime(*rqtp));
+	hrtimer_init_sleeper(&t, current);
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		hrtimer_start_expires(&t.timer, mode);
+		if (!hrtimer_active(&t.timer))
+			t.task = NULL;
+
+		if (likely(t.task)) {
+			if (t.task->sched_class->wait_interval)
+				t.task->sched_class->wait_interval(t.task);
+			schedule();
+		}
+
+		hrtimer_cancel(&t.timer);
+		mode = HRTIMER_MODE_ABS;
+	} while (t.task && !signal_pending(current));
+	__set_current_state(TASK_RUNNING);
+
+	if (t.task == NULL)
+		goto out;
+
+	/* Absolute timers don't need this to be restarted. */
+	if (mode == HRTIMER_MODE_ABS) {
+		ret = -ERESTARTNOHAND;
+		goto out;
+	}
+
+	if (rmtp) {
+		ktime_t rmt;
+		struct timespec rmt_ts;
+
+		rmt = hrtimer_expires_remaining(&t.timer);
+		if (rmt.tv64 > 0)
+			goto out;
+		rmt_ts = ktime_to_timespec(rmt);
+		if (!timespec_valid(&rmt_ts))
+			goto out;
+		if (copy_to_user(rmtp, &rmt, sizeof(*rmtp)))
+			ret = -EFAULT;
+	}
+out:
+	destroy_hrtimer_on_stack(&t.timer);
+	return ret;
+}
+
 static inline int should_resched(void)
 {
 	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
diff --git a/kernel/sched_deadline.c b/kernel/sched_deadline.c
index 7b57bb0..82c0192 100644
--- a/kernel/sched_deadline.c
+++ b/kernel/sched_deadline.c
@@ -401,6 +401,14 @@ static void yield_task_deadline(struct rq *rq)
 {
 }
 
+/*
+ * Informs the scheduler that an instance ended.
+ */
+static void wait_interval_deadline(struct task_struct *p)
+{
+	p->dl.flags |= DL_NEW;
+}
+
 #ifdef CONFIG_SCHED_HRTICK
 static void start_hrtick_deadline(struct rq *rq, struct task_struct *p)
 {
@@ -538,6 +546,7 @@ static const struct sched_class deadline_sched_class = {
 	.enqueue_task		= enqueue_task_deadline,
 	.dequeue_task		= dequeue_task_deadline,
 	.yield_task		= yield_task_deadline,
+	.wait_interval		= wait_interval_deadline,
 
 	.check_preempt_curr	= check_preempt_curr_deadline,
 
-- 
1.6.0.4


-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa  (Italy)

http://blog.linux.it/raistlin / raistlin@ekiga.net /
dario.faggioli@jabber.org

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

  parent reply	other threads:[~2009-10-16 15:45 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-16 15:35 [RFC 0/12][PATCH] SCHED_DEADLINE (new version of SCHED_EDF) Raistlin
2009-10-16 15:38 ` [RFC 1/12][PATCH] Extended scheduling parameters structure added Raistlin
2009-12-29 12:15   ` Peter Zijlstra
2010-01-13 10:36     ` Raistlin
2009-10-16 15:40 ` [RFC 0/12][PATCH] SCHED_DEADLINE: core of the scheduling class Raistlin
2009-12-29 12:25   ` Peter Zijlstra
2010-01-13 10:40     ` Dario Faggioli
2009-12-29 12:27   ` Peter Zijlstra
2010-01-13 10:42     ` Raistlin
2009-12-29 14:30   ` Peter Zijlstra
2009-12-29 14:37     ` Peter Zijlstra
2009-12-29 14:40       ` Peter Zijlstra
2010-01-13 16:32     ` Dario Faggioli
2010-01-13 16:47       ` Peter Zijlstra
2009-12-29 14:41   ` Peter Zijlstra
2010-01-13 10:46     ` Raistlin
2009-10-16 15:41 ` [RFC 0/12][PATCH] SCHED_DEADLINE: fork and terminate task logic Raistlin
2009-12-29 15:20   ` Peter Zijlstra
2010-01-13 11:11     ` Raistlin
2010-01-13 16:15       ` Peter Zijlstra
2010-01-13 16:28         ` Dario Faggioli
2010-01-13 21:30         ` Fabio Checconi
2009-10-16 15:41 ` [RFC 0/12][PATCH] SCHED_DEADLINE: added sched_*_ex syscalls Raistlin
2009-10-16 15:42 ` [RFC 0/12][PATCH] SCHED_DEADLINE: added sched-debug support Raistlin
2009-10-16 15:43 ` [RFC 6/12][PATCH] SCHED_DEADLINE: added scheduling latency tracer Raistlin
2009-10-16 15:44 ` [RFC 7/12][PATCH] SCHED_DEADLINE: signal delivery when overrunning Raistlin
2009-12-28 14:19   ` Peter Zijlstra
2010-01-13  9:30     ` Raistlin
2009-10-16 15:44 ` Raistlin [this message]
2009-12-28 14:30   ` [RFC 8/12][PATCH] SCHED_DEADLINE: wait next instance syscall added Peter Zijlstra
2010-01-13  9:33     ` Raistlin
2009-10-16 15:45 ` [RFC 9/12][PATCH] SCHED_DEADLINE: system wide bandwidth management Raistlin
2009-11-06 11:34   ` Dhaval Giani
2009-12-28 14:44   ` Peter Zijlstra
2010-01-13  9:41     ` Raistlin
2009-10-16 15:46 ` [RFC 10/12][PATCH] SCHED_DEADLINE: group bandwidth management code Raistlin
2009-12-28 14:51   ` Peter Zijlstra
2010-01-13  9:46     ` Raistlin
2009-10-16 15:47 ` [RFC 11/12][PATCH] SCHED_DEADLINE: documentation Raistlin
2009-10-16 15:48 ` [RFC 12/12][PATCH] SCHED_DEADLINE: modified sched_*_ex API Raistlin
2009-12-28 15:09   ` Peter Zijlstra
2010-01-13 10:27     ` Raistlin
2010-01-13 16:23       ` Peter Zijlstra
2009-12-29 12:15   ` Peter Zijlstra
2010-01-13 10:33     ` Raistlin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1255707898.6228.463.camel@Palantir \
    --to=raistlin@linux.it \
    --cc=bbb@cs.unc.edu \
    --cc=cfriesen@nortel.com \
    --cc=darren@dvhart.com \
    --cc=dhaval.giani@gmail.com \
    --cc=fabio@gandalf.sssup.it \
    --cc=fweisbec@gmail.com \
    --cc=giuseppe.lipari@sssup.it \
    --cc=henrik@austad.us \
    --cc=johan.eker@ericsson.com \
    --cc=juri.lelli@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=michael@evidence.eu.com \
    --cc=mingo@elte.hu \
    --cc=p.faure@akatech.ch \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sven@thebigcorporation.com \
    --cc=tglx@linutronix.de \
    --cc=tommaso.cucinotta@sssup.it \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox