[PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
       [not found] <e932a33e0910210833j1fab6129m23b55101c5978c4a@mail.gmail.com>
@ 2009-10-21 15:38 ` Soumya K S
  2009-10-22  7:54   ` Raistlin
                     ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Soumya K S @ 2009-10-21 15:38 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, mingo

[-- Attachment #1: Type: text/plain, Size: 1156 bytes --]

Hello All,

We would like to present a patch on Deployment specific Real-Time
Linux, topic discussed in LinuxCon 2009
<http://events.linuxfoundation.org/lc09d17>

The developed framework allows user to specify the real-time
strategies for a specific real-time scenario. User specifies
configurations like scheduling policy, Deadline-miss Fault-tolerance
limit, interrupt priorities, etc. Real-time applications use onetime
gateway to notify kernel that they require real-time response. All
applications use existing POSIX APIs. DRTL scheduler is time-aware and
uses EDF as the scheduling policy.

The patch consists of Time aware scheduler having SCHED_EDF as the
scheduling policy, Deadline based scheduling for Interrupt handlers,
Deadline Inheritance support for RT-Mutexes.

The patch is for the kernel version 2.6.32-rc3. It has been tested on
OMAP3530, ATMEL AT91SAM9261 and X86 platforms.

We look forward for support and feedback about
DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en>
and the patch for its feasibility, scalability and performance.

Many Thanks,
Soumya KS
Shubhro Sinha

[-- Attachment #2: README --]
[-- Type: application/octet-stream, Size: 102 bytes --]

Patch for Deployment Specific Real-Time Linux Support
Soumya KS and Shubhro Sinha <ssks.mt@gmail.com>

[-- Attachment #3: drtl_patch_2010_1 --]
[-- Type: application/octet-stream, Size: 22601 bytes --]

diff -Naur linux-2.6.32-rc3/arch/arm/include/asm/signal.h linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h
--- linux-2.6.32-rc3/arch/arm/include/asm/signal.h	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h	2009-10-20 10:40:16.000000000 +0530
@@ -70,6 +70,7 @@
 #define SIGRTMIN	32
 #define SIGRTMAX	_NSIG
 
+#define SIGMISSDEAD	SIGRTMIN + 4
 #define SIGSWI		32
 
 /*
diff -Naur linux-2.6.32-rc3/include/linux/interrupt.h linux-2.6.32-rc3-drtl/include/linux/interrupt.h
--- linux-2.6.32-rc3/include/linux/interrupt.h	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/interrupt.h	2009-10-20 11:55:45.000000000 +0530
@@ -107,6 +107,12 @@
 };
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
+#ifdef CONFIG_SCHED_EDF
+extern int __must_check
+request_irq_edf(unsigned int irq, irq_handler_t handler,irq_handler_t thread_fn, unsigned long flags,
+		const char *name, void *dev, struct timespec *ts);
+#endif
+
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 extern int __must_check
diff -Naur linux-2.6.32-rc3/include/linux/irq.h linux-2.6.32-rc3-drtl/include/linux/irq.h
--- linux-2.6.32-rc3/include/linux/irq.h	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/irq.h	2009-10-20 11:51:27.000000000 +0530
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/wait.h>
+#include <linux/ktime.h>
 
 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -206,6 +207,8 @@
 	struct proc_dir_entry	*dir;
 #endif
 	const char		*name;
+	ktime_t			deadline;
+	unsigned		edf_flag;
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
diff -Naur linux-2.6.32-rc3/include/linux/plist.h linux-2.6.32-rc3-drtl/include/linux/plist.h
--- linux-2.6.32-rc3/include/linux/plist.h	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/plist.h	2009-10-20 11:00:31.000000000 +0530
@@ -87,6 +87,8 @@
 
 struct plist_node {
 	int			prio;
+	long long int		deadline;
+	int			policy;
 	struct plist_head	plist;
 };
 
@@ -142,9 +144,11 @@
  * @node:	&struct plist_node pointer
  * @prio:	initial node priority
  */
-static inline void plist_node_init(struct plist_node *node, int prio)
+static inline void plist_node_init(struct plist_node *node, int prio, long long int deadline, int policy)
 {
 	node->prio = prio;
+	node->deadline = deadline;
+	node->policy = policy;
 	plist_head_init(&node->plist, NULL);
 }
 
diff -Naur linux-2.6.32-rc3/include/linux/sched.h linux-2.6.32-rc3-drtl/include/linux/sched.h
--- linux-2.6.32-rc3/include/linux/sched.h	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/sched.h	2009-10-20 11:03:51.000000000 +0530
@@ -36,6 +36,7 @@
 #define SCHED_FIFO		1
 #define SCHED_RR		2
 #define SCHED_BATCH		3
+#define SCHED_EDF		123
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE		5
 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
@@ -43,9 +44,6 @@
 
 #ifdef __KERNEL__
 
-struct sched_param {
-	int sched_priority;
-};
 
 #include <asm/param.h>	/* for HZ */
 
@@ -102,6 +100,11 @@
 struct bts_context;
 struct perf_event_context;
 
+struct sched_param {
+	int sched_priority;
+	struct timespec deadline;
+};
+
 /*
  * List of flags we want to share for kernel threads,
  * if only because they are not used by them anyway.
@@ -195,6 +198,7 @@
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
 #define TASK_WAKING		256
+#define EXIT_MISS_DEADLINE	512	
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -1201,6 +1205,9 @@
 	int nr_cpus_allowed;
 
 	struct sched_rt_entity *back;
+#ifdef CONFIG_SCHED_EDF
+	struct rb_node edf_node;
+#endif
 #ifdef CONFIG_RT_GROUP_SCHED
 	struct sched_rt_entity	*parent;
 	/* rq on which this entity is (to be) queued: */
@@ -1232,6 +1239,11 @@
 	const struct sched_class *sched_class;
 	struct sched_entity se;
 	struct sched_rt_entity rt;
+#ifdef CONFIG_SCHED_EDF
+	ktime_t edf_deadline;
+	ktime_t rt_deadline;
+	struct timespec orig_deadline;
+#endif
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	/* list of struct preempt_notifier: */
@@ -1733,6 +1745,7 @@
 #define PF_EXITING	0x00000004	/* getting shut down */
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
+#define	PF_HARDIRQ	0x00000020	/* hardirq context */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
@@ -1932,7 +1945,7 @@
 
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig
--- linux-2.6.32-rc3/init/Kconfig	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/init/Kconfig	2009-10-20 10:40:16.000000000 +0530
@@ -425,6 +425,12 @@
 #
 config HAVE_UNSTABLE_SCHED_CLOCK
 	bool
+	
+config SCHED_EDF
+	bool "EDF Scheduler Support"
+	default n
+	depends on !GROUP_SCHED
+	depends on !SMP
 
 config GROUP_SCHED
 	bool "Group CPU scheduler"
diff -Naur linux-2.6.32-rc3/init/main.c linux-2.6.32-rc3-drtl/init/main.c
--- linux-2.6.32-rc3/init/main.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/init/main.c	2009-10-20 10:40:16.000000000 +0530
@@ -101,6 +101,10 @@
 enum system_states system_state __read_mostly;
 EXPORT_SYMBOL(system_state);
 
+#ifdef CONFIG_SCHED_EDF
+void kthread_deadmiss(void);
+#endif
+
 /*
  * Boot command-line arguments
  */
@@ -428,6 +432,9 @@
 	numa_default_policy();
 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
 	kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+#ifdef CONFIG_SCHED_EDF
+	kernel_thread(kthread_deadmiss, NULL, CLONE_FS | CLONE_FILES);
+#endif
 	unlock_kernel();
 
 	/*
diff -Naur linux-2.6.32-rc3/kernel/futex.c linux-2.6.32-rc3-drtl/kernel/futex.c
--- linux-2.6.32-rc3/kernel/futex.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/futex.c	2009-10-20 11:12:18.000000000 +0530
@@ -1384,7 +1384,7 @@
 	 */
 	prio = min(current->normal_prio, MAX_RT_PRIO);
 
-	plist_node_init(&q->list, prio);
+	plist_node_init(&q->list, prio, 0, 0);
 #ifdef CONFIG_DEBUG_PI_LIST
 	q->list.plist.lock = &hb->lock;
 #endif
diff -Naur linux-2.6.32-rc3/kernel/irq/manage.c linux-2.6.32-rc3-drtl/kernel/irq/manage.c
--- linux-2.6.32-rc3/kernel/irq/manage.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/irq/manage.c	2009-10-20 10:41:06.000000000 +0530
@@ -536,7 +536,16 @@
 	struct irq_desc *desc = irq_to_desc(action->irq);
 	int wake, oneshot = desc->status & IRQ_ONESHOT;
 
-	sched_setscheduler(current, SCHED_FIFO, &param);
+	current->flags |= PF_HARDIRQ;
+	if (desc->edf_flag)
+	{
+		param.deadline.tv_sec = desc->deadline.tv.sec;
+               param.deadline.tv_nsec = desc->deadline.tv.nsec;
+               sched_setscheduler(current, SCHED_EDF, &param);
+	}
+	else 
+		sched_setscheduler(current, SCHED_FIFO, &param);
+
 	current->irqaction = action;
 
 	while (!irq_wait_for_interrupt(action)) {
@@ -1088,3 +1097,18 @@
 	return retval;
 }
 EXPORT_SYMBOL(request_threaded_irq);
+
+#ifdef CONFIG_SCHED_EDF
+int request_irq_edf (unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn,
+                	unsigned long irqflags, const char *devname, void *dev_id, struct timespec *deadline)
+{
+        struct irq_desc *desc;
+        desc = irq_to_desc(irq);
+        desc->deadline.tv.sec = deadline->tv_sec;
+        desc->deadline.tv.nsec = deadline->tv_nsec;
+        desc->edf_flag = 1;
+        return request_threaded_irq (irq, handler,thread_fn, irqflags, devname, dev_id);
+}
+EXPORT_SYMBOL(request_irq_edf);
+#endif
+
diff -Naur linux-2.6.32-rc3/kernel/kthread.c linux-2.6.32-rc3-drtl/kernel/kthread.c
--- linux-2.6.32-rc3/kernel/kthread.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/kthread.c	2009-10-20 11:54:12.000000000 +0530
@@ -33,6 +33,14 @@
 	struct list_head list;
 };
 
+#ifdef CONFIG_SCHED_EDF
+struct kthread_deadmiss_info {
+	struct completion deadmiss;
+	struct task_struct *k;
+} kthread_dead_info;
+EXPORT_SYMBOL (kthread_dead_info);
+#endif
+
 struct kthread {
 	int should_stop;
 	struct completion exited;
@@ -247,3 +255,30 @@
 
 	return 0;
 }
+
+#ifdef CONFIG_SCHED_EDF
+void dead_miss_default (void)
+{
+	struct task_struct *tsk = current;
+	set_task_comm(current, "Deadmiss Default");
+	/* Try to stop the runaway thread */
+	kthread_stop (kthread_dead_info.k);
+}
+
+void kthread_deadmiss (void)
+{
+	struct task_struct *tsk = current;
+
+        set_task_comm(tsk, "Deadmiss Thread");
+        ignore_signals(tsk);
+        current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+
+        while (1)
+        {
+                init_completion(&kthread_dead_info.deadmiss);
+                wait_for_completion (&kthread_dead_info.deadmiss);
+		kthread_run(dead_miss_default,NULL,"Deadmiss Default");
+
+        }
+}	
+#endif
diff -Naur linux-2.6.32-rc3/kernel/rtmutex.c linux-2.6.32-rc3-drtl/kernel/rtmutex.c
--- linux-2.6.32-rc3/kernel/rtmutex.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/rtmutex.c	2009-10-20 11:13:24.000000000 +0530
@@ -112,6 +112,19 @@
 		   task->normal_prio);
 }
 
+#ifdef CONFIG_SCHED_EDF
+ktime_t rt_mutex_getdeadline(struct task_struct *task)
+{
+
+        if (likely(!task_has_pi_waiters(task))) {
+                return task->rt_deadline;
+        }
+        if (ktime_sub((ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline,task->rt_deadline).tv64 < 0 )
+                return (ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline;
+        else return task->rt_deadline;
+}
+#endif
+
 /*
  * Adjust the priority of a task, after its pi_waiters got modified.
  *
@@ -122,7 +135,20 @@
 	int prio = rt_mutex_getprio(task);
 
 	if (task->prio != prio)
-		rt_mutex_setprio(task, prio);
+		rt_mutex_setprio(task, prio, NULL);
+
+#ifdef CONFIG_SCHED_EDF
+	else {
+                if ((task_top_pi_waiter(task)->pi_list_entry.policy == SCHED_EDF && task->policy == SCHED_EDF)
+			|| (!task_has_pi_waiters(task)) && task->policy == SCHED_EDF) {
+                        ktime_t deadline = rt_mutex_getdeadline(task);
+                        if (!ktime_equal (deadline, task->edf_deadline))
+                                rt_mutex_setprio(task,prio,&deadline);
+                        return;
+                }
+        }
+#endif
+
 }
 
 /*
@@ -424,8 +450,14 @@
 	__rt_mutex_adjust_prio(task);
 	waiter->task = task;
 	waiter->lock = lock;
-	plist_node_init(&waiter->list_entry, task->prio);
-	plist_node_init(&waiter->pi_list_entry, task->prio);
+#ifdef CONFIG_SCHED_EDF
+	plist_node_init(&waiter->list_entry, current->prio,current->edf_deadline.tv64,current->policy);
+        plist_node_init(&waiter->pi_list_entry, current->prio,current->edf_deadline.tv64,current->policy);
+#else
+	plist_node_init(&waiter->list_entry, task->prio, 0,0);
+	plist_node_init(&waiter->pi_list_entry, task->prio,0,0);
+#endif
+
 
 	/* Get the top priority waiter on the lock */
 	if (rt_mutex_has_waiters(lock))
diff -Naur linux-2.6.32-rc3/kernel/sched.c linux-2.6.32-rc3-drtl/kernel/sched.c
--- linux-2.6.32-rc3/kernel/sched.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched.c	2009-10-20 11:10:33.000000000 +0530
@@ -121,7 +121,7 @@
 
 static inline int rt_policy(int policy)
 {
-	if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+	if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR || policy == SCHED_EDF))
 		return 1;
 	return 0;
 }
@@ -451,6 +451,9 @@
 struct rt_rq {
 	struct rt_prio_array active;
 	unsigned long rt_nr_running;
+#ifdef CONFIG_SCHED_EDF
+	unsigned long edf_running;
+#endif
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
 	struct {
 		int curr; /* highest queued rt task prio */
@@ -479,6 +482,11 @@
 	struct task_group *tg;
 	struct sched_rt_entity *rt_se;
 #endif
+#ifdef CONFIG_SCHED_EDF
+	struct rb_root		edf_root;
+	struct sched_rt_entity	*edf_next;
+
+#endif
 };
 
 #ifdef CONFIG_SMP
@@ -1816,6 +1824,9 @@
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
+#ifdef CONFIG_SCHED_EDF
+#include "sched_edf.c"
+#endif
 #include "sched_rt.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
@@ -2560,8 +2571,12 @@
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
 #endif
-	plist_node_init(&p->pushable_tasks, MAX_PRIO);
 
+#ifdef CONFIG_SCHED_EDF
+	plist_node_init(&p->pushable_tasks, MAX_PRIO, p->edf_deadline.tv64, p->policy);
+#else
+	plist_node_init(&p->pushable_tasks, MAX_PRIO, 0, 0);
+#endif
 	put_cpu();
 }
 
@@ -5942,7 +5957,7 @@
  *
  * Used by the rt_mutex code to implement priority inheritance logic.
  */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline)
 {
 	unsigned long flags;
 	int oldprio, on_rq, running;
@@ -5966,7 +5981,13 @@
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
-
+	
+#ifdef CONFIG_SCHED_EDF
+	if (p->policy == SCHED_EDF && deadline != NULL)
+	{
+		p->edf_deadline = *deadline;
+	}
+#endif
 	p->prio = prio;
 
 	if (running)
@@ -6150,6 +6171,7 @@
 		break;
 	case SCHED_FIFO:
 	case SCHED_RR:
+	case SCHED_EDF:
 		p->sched_class = &rt_sched_class;
 		break;
 	}
@@ -6197,7 +6219,7 @@
 		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
 		policy &= ~SCHED_RESET_ON_FORK;
 
-		if (policy != SCHED_FIFO && policy != SCHED_RR &&
+		if (policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_EDF &&
 				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
 				policy != SCHED_IDLE)
 			return -EINVAL;
@@ -6344,7 +6366,7 @@
 {
 	return __sched_setscheduler(p, policy, param, false);
 }
-
+EXPORT_SYMBOL(sched_setscheduler_nocheck);
 static int
 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
@@ -6361,7 +6383,17 @@
 	retval = -ESRCH;
 	p = find_process_by_pid(pid);
 	if (p != NULL)
+	{	
+#ifdef CONFIG_SCHED_EDF
+		if (policy == SCHED_EDF)
+		{
+			p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (lparam.deadline));
+			p->orig_deadline = lparam.deadline;
+			p->rt_deadline = p->edf_deadline; 
+		}
+#endif
 		retval = sched_setscheduler(p, policy, &lparam);
+	}
 	rcu_read_unlock();
 
 	return retval;
@@ -6767,6 +6799,7 @@
 	switch (policy) {
 	case SCHED_FIFO:
 	case SCHED_RR:
+	case SCHED_EDF:
 		ret = MAX_USER_RT_PRIO-1;
 		break;
 	case SCHED_NORMAL:
@@ -6792,6 +6825,7 @@
 	switch (policy) {
 	case SCHED_FIFO:
 	case SCHED_RR:
+	case SCHED_EDF:
 		ret = 1;
 		break;
 	case SCHED_NORMAL:
@@ -9226,7 +9260,11 @@
 {
 	struct rt_prio_array *array;
 	int i;
-
+#ifdef CONFIG_SCHED_EDF
+	rt_rq->edf_root.rb_node = NULL;
+	rt_rq->edf_running = 0;
+	rt_rq->edf_next = NULL;
+#endif
 	array = &rt_rq->active;
 	for (i = 0; i < MAX_RT_PRIO; i++) {
 		INIT_LIST_HEAD(array->queue + i);
diff -Naur linux-2.6.32-rc3/kernel/sched_edf.c linux-2.6.32-rc3-drtl/kernel/sched_edf.c
--- linux-2.6.32-rc3/kernel/sched_edf.c	1970-01-01 05:30:00.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched_edf.c	2009-10-20 11:25:24.000000000 +0530
@@ -0,0 +1,116 @@
+#define check_bit(node1, node2) ((node1->rb_parent_color ^ node2->rb_parent_color)&2)
+static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq);
+static inline void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq);
+static int has_equal = 0;
+static struct sched_rt_entity *leftmost = NULL;
+static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se);
+static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se);
+static inline ktime_t edf_se_deadline (struct sched_rt_entity *rt_se)
+{
+	return (rt_task_of(rt_se))->edf_deadline;
+}
+void enqueue_task_edf(struct rq *rq, struct task_struct *p) 
+{
+
+	struct rt_rq *rt_rq = &rq->rt;
+	struct rb_node **link = &rt_rq->edf_root.rb_node, *parent=NULL;
+	struct sched_rt_entity *entry;
+	int leftmost_flag= 1, equal = 0;
+	s64 diff;
+	u8 last_bit = 0;
+	/*
+	* Find the right place in the rbtree:
+        */
+	has_equal = 0;
+	if (p->flags & PF_HARDIRQ)
+		p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (p->orig_deadline));
+        while (*link) {
+                parent = *link;
+                entry = rb_entry(parent, struct sched_rt_entity, edf_node);
+                /*
+                 * We dont care about collisions. Nodes with
+                 * the same key stay together.
+                 */
+
+		diff = ktime_sub(p->edf_deadline,edf_se_deadline(entry)).tv64;
+		if (diff < 0) {
+                        link = &parent->rb_left;
+                } else if (diff == 0) {
+                        link = &parent->rb_left;
+			last_bit = (parent->rb_parent_color & 0x02);
+			equal = 1;
+		}
+		else {
+                        link = &parent->rb_right;
+			leftmost_flag = 0;
+                }
+        }
+	rb_link_node (&p->rt.edf_node,parent,link);
+	rb_insert_color(&p->rt.edf_node,&rt_rq->edf_root);
+	if (!equal)
+		last_bit = (parent==NULL)?0x2:~(parent->rb_parent_color & 0x02);
+	p->rt.edf_node.rb_parent_color |= (last_bit & 0x02);
+	if (leftmost_flag)
+	{
+		leftmost = rt_rq->edf_next = &p->rt;
+		if (equal) {
+			has_equal = 1;
+		}
+	}
+	(rt_rq_of_se(&p->rt))->edf_running++;
+	inc_rt_tasks(&p->rt,rt_rq_of_se(&p->rt));
+}
+
+void dequeue_task_edf(struct rq *rq, struct task_struct *p) 
+{
+	struct rb_node *next_node;
+	struct rb_node *prev_node;
+	struct rb_node *assign_node;
+	if (rq->rt.edf_running > 2)
+	{
+		next_node = rb_next(&leftmost->edf_node);
+		if (&p->rt.edf_node == next_node)
+			next_node = rb_next(next_node);
+		else if (&p->rt == leftmost)
+		{
+			leftmost = rb_entry (next_node, struct sched_rt_entity, edf_node);
+			next_node = rb_next(&leftmost->edf_node);
+		}
+		if (&p->rt == rq->rt.edf_next)
+		{
+			rq->rt.edf_next = rb_entry(rb_next(&(rq->rt.edf_next->edf_node)), struct sched_rt_entity, edf_node);
+			if (has_equal && (rq->rt.edf_next == NULL || check_bit((&(p->rt.edf_node)),(&(rq->rt.edf_next->edf_node)))))
+				rq->rt.edf_next = leftmost;
+		}
+		has_equal = !check_bit((&leftmost->edf_node), next_node);
+	}
+	else
+	{
+
+		next_node = rb_next (&p->rt.edf_node);
+		prev_node = rb_prev (&p->rt.edf_node);
+		assign_node = (next_node == NULL) ? prev_node : next_node;
+		if (assign_node != NULL)
+			leftmost = rq->rt.edf_next = rb_entry(assign_node, struct sched_rt_entity, edf_node);
+		else 
+			leftmost = rq->rt.edf_next = NULL;
+		has_equal = 0;
+	}
+	(rt_rq_of_se(&p->rt))->edf_running--;
+	dec_rt_tasks(&p->rt, rt_rq_of_se(&p->rt));
+	rb_erase(&p->rt.edf_node, &rq->rt.edf_root);
+}
+
+struct sched_rt_entity *pick_next_task_edf(struct rq *rq)
+{
+	struct sched_rt_entity *retval;
+	struct rb_node *next_node;
+	retval = rq->rt.edf_next;
+	if (has_equal) 
+	{
+		next_node = rb_next(&retval->edf_node);
+		rq->rt.edf_next = (next_node == NULL || check_bit((&rq->rt.edf_next->edf_node), next_node)) ? leftmost :
+			rb_entry(next_node, struct sched_rt_entity, edf_node);
+	}
+	return retval;
+}
diff -Naur linux-2.6.32-rc3/kernel/sched_rt.c linux-2.6.32-rc3-drtl/kernel/sched_rt.c
--- linux-2.6.32-rc3/kernel/sched_rt.c	2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched_rt.c	2009-10-20 10:40:16.000000000 +0530
@@ -3,6 +3,14 @@
  * policies)
  */
 
+#ifdef CONFIG_SCHED_EDF
+struct kthread_deadmiss_info {
+	struct completion deadmiss;
+	struct task_struct *k;
+};
+extern struct kthread_deadmiss_info kthread_dead_info;
+#endif
+
 #ifdef CONFIG_RT_GROUP_SCHED
 
 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
@@ -607,6 +615,7 @@
 	if (unlikely((s64)delta_exec < 0))
 		delta_exec = 0;
 
+
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
@@ -614,7 +623,6 @@
 
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
-
 	sched_rt_avg_update(rq, delta_exec);
 
 	if (!rt_bandwidth_enabled())
@@ -885,6 +893,11 @@
 	if (wakeup)
 		rt_se->timeout = 0;
 
+#ifdef CONFIG_SCHED_EDF
+	if (p->policy == SCHED_EDF) 
+		enqueue_task_edf(rq,p);
+	else 
+#endif
 	enqueue_rt_entity(rt_se);
 
 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
@@ -896,7 +909,12 @@
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
-	dequeue_rt_entity(rt_se);
+#ifdef CONFIG_SCHED_EDF
+	if (p->policy == SCHED_EDF)
+		dequeue_task_edf(rq,p);
+	else 
+#endif
+		dequeue_rt_entity(rt_se);
 
 	dequeue_pushable_task(rq, p);
 }
@@ -924,6 +942,11 @@
 	struct sched_rt_entity *rt_se = &p->rt;
 	struct rt_rq *rt_rq;
 
+#ifndef CONFIG_SCHED_EDF
+	if (p->policy == SCHED_EDF)
+		return;
+#endif
+
 	for_each_sched_rt_entity(rt_se) {
 		rt_rq = rt_rq_of_se(rt_se);
 		requeue_rt_entity(rt_rq, rt_se, head);
@@ -1036,10 +1059,22 @@
 	int idx;
 
 	idx = sched_find_first_bit(array->bitmap);
+#ifdef CONFIG_SCHED_EDF
+	BUG_ON(!rt_rq->edf_next && idx >= MAX_RT_PRIO);
+#else 
 	BUG_ON(idx >= MAX_RT_PRIO);
+#endif
 
-	queue = array->queue + idx;
-	next = list_entry(queue->next, struct sched_rt_entity, run_list);
+#ifdef CONFIG_SCHED_EDF
+	if (!rt_rq->edf_next || rt_se_prio(rt_rq->edf_next) > idx) {
+#endif
+		queue = array->queue + idx;
+		next = list_entry(queue->next, struct sched_rt_entity, run_list);
+
+#ifdef CONFIG_SCHED_EDF
+	}
+	else next = pick_next_task_edf(rq);
+#endif
 
 	return next;
 }
@@ -1089,11 +1124,30 @@
 	return p;
 }
 
+
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);
 	p->se.exec_start = 0;
-
+#ifdef CONFIG_SCHED_EDF
+	/* Deadline miss handler for run away tasks */
+	if (p->policy == SCHED_EDF && !p->flags&PF_HARDIRQ)  {
+            if (ktime_sub(p->edf_deadline,ktime_get()).tv64 <= 0) {
+		if (p->flags&PF_KTHREAD) {
+			dequeue_task_edf (rq,p);
+			kthread_dead_info.k = p;
+			complete (&kthread_dead_info.deadmiss);
+			set_task_state(p, TASK_INTERRUPTIBLE);
+		} 
+		else {
+			sigaddset(&p->pending.signal,SIGMISSDEAD);
+			set_tsk_thread_flag(p, TIF_SIGPENDING);
+			p->exit_code = EXIT_MISS_DEADLINE;
+		}
+	    }
+    	}
+#endif
+	
 	/*
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S
@ 2009-10-22  7:54   ` Raistlin
  2009-10-22 14:42     ` Soumya K S
  2009-10-22  9:33   ` Claudio Scordino
       [not found]   ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>
  2 siblings, 1 reply; 10+ messages in thread
From: Raistlin @ 2009-10-22  7:54 UTC (permalink / raw)
  To: Soumya K S
  Cc: linux-kernel, tglx, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi

[-- Attachment #1: Type: text/plain, Size: 3053 bytes --]

On Wed, 2009-10-21 at 21:08 +0530, Soumya K S wrote:
> Hello All,
> 
Hi,

I'm Dario, from Pisa (Italy), and I'm working on EDF scheduling for the
Linux kernel as you do (don't know if you've seen the threads and the
patches, all info here http://gitorious.org/sched_deadline/pages/Home).

> The developed framework allows user to specify the real-time
> strategies for a specific real-time scenario. User specifies
> configurations like scheduling policy, Deadline-miss Fault-tolerance
> limit, interrupt priorities, etc. Real-time applications use onetime
> gateway to notify kernel that they require real-time response. All
> applications use existing POSIX APIs. DRTL scheduler is time-aware and
> uses EDF as the scheduling policy.
> 
Nice, from here, it seemed we were working on very similar things, and I
was wondering if we could somehow collaborate... :-)

Then I looked at the code and I saw our two designs are quite different,
e.g., you don't constraint execution times, don't run any admission test
to avoid oversubscription and you stop deadline missing tasks... Am I
right?
Even from the implementation point of view, I see you didn't used a new
scheduling class.

However, I think there still would be room for collaboration, if you are
interested in...

> The patch consists of Time aware scheduler having SCHED_EDF as the
> scheduling policy, Deadline based scheduling for Interrupt handlers,
> Deadline Inheritance support for RT-Mutexes.
> 
I was very curious on how you dealt with deadline inheritance in SMPs,
than I saw this in your patch:

diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig
+config SCHED_EDF
+	bool "EDF Scheduler Support"
+	default n
+	depends on !GROUP_SCHED
+	depends on !SMP

:-P

I'm right in the opposite situation, I've got SMP (partitioned for now,
but we're working on migrations) and also CGROUPS support, but we are
still wondering how deadline (or something more sophisticated, like
bandwidth) inheritance could work in such a case...

Again, I think I see collaboration possibilities, again, if you're
interested in...

> The patch is for the kernel version 2.6.32-rc3. It has been tested on
> OMAP3530, ATMEL AT91SAM9261 and X86 platforms.
> 
Ok... Are you also targeting (or plan to) preempt-rt kernels?

> We look forward for support and feedback about
> DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en>
> and the patch for its feasibility, scalability and performance.
> 
Do you already have any numbers or testcase? I have some (well, a few!)
of them... I'll try to find the time to give it a try to your patch with
them...

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa  (Italy)

http://blog.linux.it/raistlin / raistlin@ekiga.net /
dario.faggioli@jabber.org

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-22  7:54   ` Raistlin
@ 2009-10-22 14:42     ` Soumya K S
  2009-10-25  8:46       ` Raistlin
  0 siblings, 1 reply; 10+ messages in thread
From: Soumya K S @ 2009-10-22 14:42 UTC (permalink / raw)
  To: Raistlin
  Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi

On Thu, Oct 22, 2009 at 1:24 PM, Raistlin <raistlin@linux.it> wrote:
> On Wed, 2009-10-21 at 21:08 +0530, Soumya K S wrote:
>> Hello All,
>>
> Hi,
>
> I'm Dario, from Pisa (Italy), and I'm working on EDF scheduling for the
> Linux kernel as you do (don't know if you've seen the threads and the
> patches, all info here http://gitorious.org/sched_deadline/pages/Home).
>
Hi Dario, we needed a deadline based scheduler for DRTL and we zeroed
in upon SCHED_EDF. We see that the threads here are fairly new too :-P

>> The developed framework allows user to specify the real-time
>> strategies for a specific real-time scenario. User specifies
>> configurations like scheduling policy, Deadline-miss Fault-tolerance
>> limit, interrupt priorities, etc. Real-time applications use onetime
>> gateway to notify kernel that they require real-time response. All
>> applications use existing POSIX APIs. DRTL scheduler is time-aware and
>> uses EDF as the scheduling policy.
>>
> Nice, from here, it seemed we were working on very similar things, and I
> was wondering if we could somehow collaborate... :-)
>
Sure! That would be good :)

> Then I looked at the code and I saw our two designs are quite different,
> e.g., you don't constraint execution times, don't run any admission test
> to avoid oversubscription and you stop deadline missing tasks... Am I
> right?
> Even from the implementation point of view, I see you didn't used a new
> scheduling class.
>
A simple system where there a few real-time tasks and a few non-real
time tasks, the timelines can be architected out for each real-time
task in the system. In such a case, given the RT bandwidth in the
system, the task with the lowest deadline gets to be scheduled first
till it is there in the system.
In short, for such simple systems, we shifted the burden of admission
control to the architect and kept close to the existing code.

> However, I think there still would be room for collaboration, if you are
> interested in...
>
>> The patch consists of Time aware scheduler having SCHED_EDF as the
>> scheduling policy, Deadline based scheduling for Interrupt handlers,
>> Deadline Inheritance support for RT-Mutexes.
>>
> I was very curious on how you dealt with deadline inheritance in SMPs,
> than I saw this in your patch:
>
> diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig
> +config SCHED_EDF
> +       bool "EDF Scheduler Support"
> +       default n
> +       depends on !GROUP_SCHED
> +       depends on !SMP
>
> :-P
>
> I'm right in the opposite situation, I've got SMP (partitioned for now,
> but we're working on migrations) and also CGROUPS support, but we are
> still wondering how deadline (or something more sophisticated, like
> bandwidth) inheritance could work in such a case...
>
That's right, we are still working on SMP and hope there are no
scalability issues in this patch w.r.t SMP.

> Again, I think I see collaboration possibilities, again, if you're
> interested in...
>
Definitely!

>> The patch is for the kernel version 2.6.32-rc3. It has been tested on
>> OMAP3530, ATMEL AT91SAM9261 and X86 platforms.
>>
> Ok... Are you also targeting (or plan to) preempt-rt kernels?
>
Yes, We do have the patches for preempt-rt kernel.

>> We look forward for support and feedback about
>> DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en>
>> and the patch for its feasibility, scalability and performance.
>>
> Do you already have any numbers or testcase? I have some (well, a few!)
> of them... I'll try to find the time to give it a try to your patch with
> them...
>
We have tested Co-operative context switch time, Pre-emptive context
switch time and Interrupt Latency, all of them are of ~130us for
OMAP3530.

Regards,
Soumya
Shubhro

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-22 14:42     ` Soumya K S
@ 2009-10-25  8:46       ` Raistlin
  2009-10-28 14:15         ` Soumya K S
  0 siblings, 1 reply; 10+ messages in thread
From: Raistlin @ 2009-10-25  8:46 UTC (permalink / raw)
  To: Soumya K S
  Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli

[-- Attachment #1: Type: text/plain, Size: 5483 bytes --]

On Thu, 2009-10-22 at 20:12 +0530, Soumya K S wrote: 
> Hi Dario, 
Hi again,

> we needed a deadline based scheduler for DRTL and we zeroed
> in upon SCHED_EDF. We see that the threads here are fairly new too :-P
> 
Yes, we are not far from the starting point as well... :-)

> > Nice, from here, it seemed we were working on very similar things, and I
> > was wondering if we could somehow collaborate... :-)
> >
> Sure! That would be good :)
> 
So... I looked at the code and, even if the aim of our two projects are
quite the same, our implementations are very different.

The main difference is the bandwidth reservation thing.
I strongly think that, on a system like Linux, it should be very
important to have --at least as a possibility-- the following features:
- tasks can request for a guaranteed runtime over some time interval
  (bandwidth),
- admission test should guarantee no oversubscription
- bandwidth enforcing must avoid reciprocal tasks interferences.
Maybe we can make the second and third configurable/optional (already
thought about that, and it should be quite easy), but they need to be
there, at least to avoid extending the interface again when we'll
realize they'll needed! :-P

I don't know how much you, and other people here, are familiar with such
idea... We implemented it using one of the many existing algorithm. I
can give you pointers to papers and other implementations of it if
interested.
To keep it short and practical, you can think at it as something similar
to what MacOS-X (and I think Solaris) already have --and actively use,
e.g., for the Jack Audio Connection Kit-- and call
THREAD_TIME_CONSTRAINT (very poor docs, I think, but it gives the big
picture):
http://developer.apple.com/mac/library/documentation/Darwin/Conceptual/KernelProgramming/scheduler/scheduler.html#//apple_ref/doc/uid/TP30000905-CH211-BEHJDFCA

Moreover, as Peter pointed out, coming out with an interface which is
too much tightly tied to EDF, or to any other specific algorithm,
wouldn't be the best idea. What would happen if somewhere in the future
we decide to change from the EDF algorithm to some other deadline based
one?
That's why we changed the name and the interface from _EDF/_edf (yep, it
has been our first choice too! :-P) to _DEADLINE/_deadline, and that's
why I think we should continue striving for even more
interface-algorithm independence.

Obviously, also SMP has to be there! :-P
We have it in fully partitioned right now, but I'll become global (with
support for cpu-affinity) in very short time I hope (Juri, from ReTiS
Lab in Pisa is already working on it).

For rt-mutexes/deadline inheritance, the big issue is that, it works if
you only have EDF, but with a "bandwidth aware scheduler", you need
something more, which is why we don't have it yet... However, I think it
could be a nice starting point.

Finally, I like the idea of deadline IRQ handling and I think it would
be worth to mind it some more. :-)

> > Even from the implementation point of view, I see you didn't used a new
> > scheduling class.
> >
> A simple system where there a few real-time tasks and a few non-real
> time tasks, the timelines can be architected out for each real-time
> task in the system. In such a case, given the RT bandwidth in the
> system, the task with the lowest deadline gets to be scheduled first
> till it is there in the system.
> In short, for such simple systems, we shifted the burden of admission
> control to the architect and kept close to the existing code.
> 
Well, I kind of agree on both, _iff_ you're target is _only_ such small
embedded systems. However, my very humble opinion is that, on Linux, you
need something more general and, since we are talking about real-time,
as much predictable and analyzable as you can get... And I think a
separate scheduling class would be better suited for this. What do you
think?

> > I'm right in the opposite situation, I've got SMP (partitioned for now,
> > but we're working on migrations) and also CGROUPS support, but we are
> > still wondering how deadline (or something more sophisticated, like
> > bandwidth) inheritance could work in such a case...
> >
> That's right, we are still working on SMP and hope there are no
> scalability issues in this patch w.r.t SMP.
> 
Well, I don't know. I guess achieving something similar to what we
already have now (partitioned SMP) should not be impossible even with
your approach... But if you want something different, such has global
(EDF) scheduling, where task can migrate among CPUs according to their
affinity, would but a major headache!! :-O

> > Do you already have any numbers or testcase? I have some (well, a few!)
> > of them... I'll try to find the time to give it a try to your patch with
> > them...
> >
> We have tested Co-operative context switch time, Pre-emptive context
> switch time and Interrupt Latency, all of them are of ~130us for
> OMAP3530.
> 
Mmm... I'm not sure I see why and how your patch should affect context
switches duration... However, do you have the testcases for such tests?

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa  (Italy)

http://blog.linux.it/raistlin / raistlin@ekiga.net /
dario.faggioli@jabber.org

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-25  8:46       ` Raistlin
@ 2009-10-28 14:15         ` Soumya K S
  2009-10-28 16:24           ` Raistlin
  0 siblings, 1 reply; 10+ messages in thread
From: Soumya K S @ 2009-10-28 14:15 UTC (permalink / raw)
  To: Raistlin
  Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli

On Sun, Oct 25, 2009 at 2:16 PM, Raistlin <raistlin@linux.it> wrote:
> On Thu, 2009-10-22 at 20:12 +0530, Soumya K S wrote:
>> Hi Dario,
> Hi again,
>
How you duin...
>> we needed a deadline based scheduler for DRTL and we zeroed
>> in upon SCHED_EDF. We see that the threads here are fairly new too :-P
>>
> Yes, we are not far from the starting point as well... :-)
>
>> > Nice, from here, it seemed we were working on very similar things, and I
>> > was wondering if we could somehow collaborate... :-)
>> >
>> Sure! That would be good :)
>>
> So... I looked at the code and, even if the aim of our two projects are
> quite the same, our implementations are very different.
>
> The main difference is the bandwidth reservation thing.
> I strongly think that, on a system like Linux, it should be very
> important to have --at least as a possibility-- the following features:
> - tasks can request for a guaranteed runtime over some time interval
>  (bandwidth),

We can specify the bandwidth reservation of an RT class and we use the
reservation policy of the RT scheduling class itself. By increasing
the static priority of the EDF task, we can guarantee that EDF tasks
always get the required runtime. If the user puts all his EDF tasks in
priority 1 , only his tasks run. In that case the entire RT bandwidth
is reserved for the EDF tasks. In a way your patch also does the same
thing by placing itself above the RT scheduling class. Only thing what
we don't have in place is partitioning of RT bandwidth across RR/FIFO
and EDF, which right now, we overcome by intelligently placing the
tasks with different policies in different priority levels.
If you are asking bandwidth reservation for guaranteeing determinism,
we definitely have determinism in place, but bandwidth reservation for
other real-time scheduling policies is not in place. This is something
which we can surely work on.

> - admission test should guarantee no oversubscription

So, you are calculating the WCET online in the scheduler right? Can it
calculate the amount of CPU time with the  required preciseness? Here,
you are increasing the enqueue time by adding an O(n) calculation for
every task that you enqueue. That is the reason why for a small
system, pushing this to architect made better sense in terms of
decreased latencies where the turn around time from when the task
enters till it gets the desired result matters, e.g., reading a sensor
2 times in 1ms.

> - bandwidth enforcing must avoid reciprocal tasks interferences.
> Maybe we can make the second and third configurable/optional (already
> thought about that, and it should be quite easy), but they need to be
> there, at least to avoid extending the interface again when we'll
> realize they'll needed! :-P
>
> I don't know how much you, and other people here, are familiar with such
> idea... We implemented it using one of the many existing algorithm. I
> can give you pointers to papers and other implementations of it if
> interested.

Surely, we would like to look into this if you can provide some more pointers.

> To keep it short and practical, you can think at it as something similar
> to what MacOS-X (and I think Solaris) already have --and actively use,
> e.g., for the Jack Audio Connection Kit-- and call
> THREAD_TIME_CONSTRAINT (very poor docs, I think, but it gives the big
> picture):
> http://developer.apple.com/mac/library/documentation/Darwin/Conceptual/KernelProgramming/scheduler/scheduler.html#//apple_ref/doc/uid/TP30000905-CH211-BEHJDFCA
>
> Moreover, as Peter pointed out, coming out with an interface which is
> too much tightly tied to EDF, or to any other specific algorithm,
> wouldn't be the best idea. What would happen if somewhere in the future
> we decide to change from the EDF algorithm to some other deadline based
> one?

Hmm on a lighter note, we would rather say you would just need to
replace 3 functions then :))

> That's why we changed the name and the interface from _EDF/_edf (yep, it
> has been our first choice too! :-P) to _DEADLINE/_deadline, and that's
> why I think we should continue striving for even more
> interface-algorithm independence.
>

True, but we really think its a matter of trade-off between how much
response time you can guarantee for a real-time task v/s how much
scalable you want your design to be. The deterministic response times
that you might have achieved by having all these features might be
good enough (Not sure of your numbers here) in a soft real time
scenario, but wondering if it would meet ends otherwise.

> Obviously, also SMP has to be there! :-P
> We have it in fully partitioned right now, but I'll become global (with
> support for cpu-affinity) in very short time I hope (Juri, from ReTiS
> Lab in Pisa is already working on it).
>
> For rt-mutexes/deadline inheritance, the big issue is that, it works if
> you only have EDF, but with a "bandwidth aware scheduler", you need
> something more, which is why we don't have it yet... However, I think it
> could be a nice starting point.

Yes, we too think without a DI in place, its not a complete real-time solution.

>
> Finally, I like the idea of deadline IRQ handling and I think it would
> be worth to mind it some more. :-)
>

We found many use cases for this feature where real-time tasks have
higher priority than lower priority interrupts generated as a result
of say audio streaming, etc Being able to configure these was
extremely important to maintain the deterministic property of an EDF
task in the system.

>> > Even from the implementation point of view, I see you didn't used a new
>> > scheduling class.
>> >
>> A simple system where there a few real-time tasks and a few non-real
>> time tasks, the timelines can be architected out for each real-time
>> task in the system. In such a case, given the RT bandwidth in the
>> system, the task with the lowest deadline gets to be scheduled first
>> till it is there in the system.
>> In short, for such simple systems, we shifted the burden of admission
>> control to the architect and kept close to the existing code.
>>
> Well, I kind of agree on both, _iff_ you're target is _only_ such small
> embedded systems. However, my very humble opinion is that, on Linux, you
> need something more general and, since we are talking about real-time,
> as much predictable and analyzable as you can get... And I think a
> separate scheduling class would be better suited for this. What do you
> think?
>

Yes, the target was industrial control systems where we needed
deterministic real-time response and also the responsiveness of the
task was critical. Here, the demanding real-time tasks were not too
many (~4/5 at a given point in time) and also, there were other user
tasks which had to update the results of this real-time task remotely.
Hence, we were very vary of introducing latencies in the system.
Instead, we focused on bringing in determinism into the system without
increasing its latency! Also, the concept of a deadline miss handler
was very handy, for a task missing its deadline not to interfere with
the determinism of the other tasks. In this approach, we were able to
meet the demanding response times with determinism in place.
However, I do understand that this approach puts the system designer
in a hot spot! :)

>> > I'm right in the opposite situation, I've got SMP (partitioned for now,
>> > but we're working on migrations) and also CGROUPS support, but we are
>> > still wondering how deadline (or something more sophisticated, like
>> > bandwidth) inheritance could work in such a case...
>> >
>> That's right, we are still working on SMP and hope there are no
>> scalability issues in this patch w.r.t SMP.
>>
> Well, I don't know. I guess achieving something similar to what we
> already have now (partitioned SMP) should not be impossible even with
> your approach... But if you want something different, such has global
> (EDF) scheduling, where task can migrate among CPUs according to their
> affinity, would but a major headache!! :-O
>
>> > Do you already have any numbers or testcase? I have some (well, a few!)
>> > of them... I'll try to find the time to give it a try to your patch with
>> > them...
>> >
>> We have tested Co-operative context switch time, Pre-emptive context
>> switch time and Interrupt Latency, all of them are of ~130us for
>> OMAP3530.
>>
> Mmm... I'm not sure I see why and how your patch should affect context
> switches duration... However, do you have the testcases for such tests?
>

Well we are actually saying that it does _not_ effect the context
switch time :).
We are measuring the time when a task is entered in the system till it
gets scheduled both in preemptive and non-preemptive modes. This
figure does not change even for a loaded system which shows the
deterministic turn around time for a task in terms of scheduling
latencies.

Regards,
Shubhro
Soumya

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-28 14:15         ` Soumya K S
@ 2009-10-28 16:24           ` Raistlin
  2009-11-10 14:03             ` Soumya K S
  2009-11-10 14:34             ` Peter Zijlstra
  0 siblings, 2 replies; 10+ messages in thread
From: Raistlin @ 2009-10-28 16:24 UTC (permalink / raw)
  To: Soumya K S
  Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli

[-- Attachment #1: Type: text/plain, Size: 9279 bytes --]

On Wed, 2009-10-28 at 19:45 +0530, Soumya K S wrote:
> > The main difference is the bandwidth reservation thing.
> > I strongly think that, on a system like Linux, it should be very
> > important to have --at least as a possibility-- the following features:
> > - tasks can request for a guaranteed runtime over some time interval
> >  (bandwidth),
> 
> We can specify the bandwidth reservation of an RT class and we use the
> reservation policy of the RT scheduling class itself. 
>
Yes, and all that you can specify is how much bandwidth all the
EDF+FIFO tasks in the system will get. I was talking about something
very different... :-(

> By increasing
> the static priority of the EDF task, we can guarantee that EDF tasks
> always get the required runtime. 
>
Which is not enforced to stay below any kind of value of
deterministic/stochastic worst case execution time nor any kind of
budget which is guaranteed to not being overrun. This means that you
have no way to analyze the system and that you can make no assumptions
about your tasks meeting their deadline or not, about who's going to
miss it, by how far, etc.

You can have an EDF task A executing for more than what you expected (if
you expected something, and you _should_ expect something if you want to
analyze the system at some level, don't you?) and maybe missing its
deadline.

Much more worse, you can have task A executing for more than what you
expected and making task B and/or C and/or WHATEVER missing _their_
deadline, even if they "behave well"... This is far from real-time
guaranteed behaviour, at least in my opinion. :-(

> If the user puts all his EDF tasks in
> priority 1 , only his tasks run. In that case the entire RT bandwidth
> is reserved for the EDF tasks. In a way your patch also does the same
> thing by placing itself above the RT scheduling class. 
>
Agree on this, never said something different. :-)

At least it is well known that deadline tasks have higher priority than
FIFO/RR tasks that have higher priority than OTHER tasks. This, together
with reservation based scheduling at the task (or at least task-group)
level is what make the system analyzable and predictable.

> Only thing what
> we don't have in place is partitioning of RT bandwidth across RR/FIFO
> and EDF, which right now, we overcome by intelligently placing the
> tasks with different policies in different priority levels.
>
I'm not finding the 'intelligent placing' in the patch, so I guess this
is up to the userspace. Providing the userspace with a flexible solution
is something very useful... Relying on userspace to do things
'intelligently' is something I'm not sure I would do, especially in a so
much general purpose OS like Linux, used in so much different contexts.
But, again, that's only my opinion. :-)

If I understood the code well (somme comments here and there would have
helped! :-P) one (or more) EDF task(s) can starve FIFO/RR tasks, which
may happen to me as well. However, it also may happen that one (or more)
FIFO/RR task(s) starve EDF tasks!

Thus, there always could be someone which might be starved and you can't
even say who it will be... Again, this seems lack of determinism to me.

> If you are asking bandwidth reservation for guaranteeing determinism,
> we definitely have determinism in place, but bandwidth reservation for
> other real-time scheduling policies is not in place.
>
See? World is so beautiful because there are so much different possible
opinions and interpretations of the same concepts! :-D :-D

> > - admission test should guarantee no oversubscription
> 
> So, you are calculating the WCET online in the scheduler right? 
>
No, I don't... Did you looked at the code?

> Can it
> calculate the amount of CPU time with the  required preciseness? Here,
> you are increasing the enqueue time by adding an O(n) calculation for
> every task that you enqueue. 
>
No, I don't... Did you looked at the code? :-P

> That is the reason why for a small
> system, pushing this to architect made better sense in terms of
> decreased latencies where the turn around time from when the task
> enters till it gets the desired result matters, e.g., reading a sensor
> 2 times in 1ms.
> 
Given the fact that I do not have anything in the scheduler that
increase latencies and enqueue/dequeue overhead, it sure depends on
you're target, as already said.

You keep saying that for a small system it is up to the system architect
to check if the configuration will be schedulable or not, which may be
reasonable.
What I'm wondering is how this poor guy might do that and hope to have
this enforced by a scheduling policy which allows a task to interfere
with all the other ones to the point of making them missing their
deadlines... And this could happen in your code, since you only have
deadline miss based checks, which may be not enough to prevent it.

> > That's why we changed the name and the interface from _EDF/_edf (yep, it
> > has been our first choice too! :-P) to _DEADLINE/_deadline, and that's
> > why I think we should continue striving for even more
> > interface-algorithm independence.
> >
> True, but we really think its a matter of trade-off between how much
> response time you can guarantee for a real-time task v/s how much
> scalable you want your design to be. 
>
Well, I'm not seeing how trying to have a better interface/algorithm
separation would affect the response time that much... For example, I
don't expect that putting your code in a separate scheduling class would
make you miss some deadline...

> The deterministic response times
> that you might have achieved by having all these features might be
> good enough (Not sure of your numbers here) in a soft real time
> scenario, but wondering if it would meet ends otherwise.
> 
The response time I can achieve with all these features is exactly the
same you can achieve with the current FIFO/RR task, which have more or
less the same features. Actually, the scheduling overhead is even
smaller than in rt tasks since we are still able to enforce bandwidth
without the need of hierarchical scheduling and accounting...

The added feature of being able to asking the scheduler that you don't
want you're task response time, latency and ability to meet its deadline
to be affected by some other task which is running away comes with no
price in therms of response time.

By the way, what numbers do you miss here? Just ask and I'll do my best
to provide them to you...

> Yes, the target was industrial control systems where we needed
> deterministic real-time response and also the responsiveness of the
> task was critical. Here, the demanding real-time tasks were not too
> many (~4/5 at a given point in time) and also, there were other user
> tasks which had to update the results of this real-time task remotely.
> Hence, we were very vary of introducing latencies in the system.
> Instead, we focused on bringing in determinism into the system without
> increasing its latency! 
>
Hey, 'the system' already has a scheduling policy called SCHED_FIFO
which already have _a_lot_ of determinism... and EDF is **not** more
deterministic than fix-priority! There are people that like more EDF
than FP, there are people that like more FP than EDF, they both have
advantages and drawbacks, but implementing EDF can't be claimed as
'bringing determinism'...

So, now I'm curious :-D.

You say you need EDF in that application scenario, which might be more
than true, but the reason can't be 'lack of determinism' since FP
scheduling is as much deterministic as you want/are able to configure it
using the correct priorities... So what was your problem with it?

> Also, the concept of a deadline miss handler
> was very handy, for a task missing its deadline not to interfere with
> the determinism of the other tasks. 
>
Oh, ok. But I think we can agree that you can have a task that, as said
above, not miss its own deadline --and thus you don't catch it-- but
makes all the other tasks in the system to miss their own ones!

How your definition of determinism applies on this situation? :-O

> > Mmm... I'm not sure I see why and how your patch should affect context
> > switches duration... However, do you have the testcases for such tests?
> >
> 
> Well we are actually saying that it does _not_ effect the context
> switch time :).
>
Which was expectable...

> We are measuring the time when a task is entered in the system till it
> gets scheduled both in preemptive and non-preemptive modes. This
> figure does not change even for a loaded system which shows the
> deterministic turn around time for a task in terms of scheduling
> latencies.
> 
... Ok, it seems I need to be more explicit here: do you have the code
of the tests, so that someone else can reproduce them?

Regards,
Dario

-- 
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa  (Italy)

http://blog.linux.it/raistlin / raistlin@ekiga.net /
dario.faggioli@jabber.org

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-28 16:24           ` Raistlin
@ 2009-11-10 14:03             ` Soumya K S
  2009-11-10 14:34             ` Peter Zijlstra
  1 sibling, 0 replies; 10+ messages in thread
From: Soumya K S @ 2009-11-10 14:03 UTC (permalink / raw)
  To: Raistlin
  Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra,
	Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli

On Wed, Oct 28, 2009 at 9:54 PM, Raistlin <raistlin@linux.it> wrote:
> On Wed, 2009-10-28 at 19:45 +0530, Soumya K S wrote:
>> > The main difference is the bandwidth reservation thing.
>> > I strongly think that, on a system like Linux, it should be very
>> > important to have --at least as a possibility-- the following features:
>> > - tasks can request for a guaranteed runtime over some time interval
>> >  (bandwidth),
>>
>> We can specify the bandwidth reservation of an RT class and we use the
>> reservation policy of the RT scheduling class itself.
>>
> Yes, and all that you can specify is how much bandwidth all the
> EDF+FIFO tasks in the system will get. I was talking about something
> very different... :-(
>
>> By increasing
>> the static priority of the EDF task, we can guarantee that EDF tasks
>> always get the required runtime.
>>
> Which is not enforced to stay below any kind of value of
> deterministic/stochastic worst case execution time nor any kind of
> budget which is guaranteed to not being overrun. This means that you
> have no way to analyze the system and that you can make no assumptions
> about your tasks meeting their deadline or not, about who's going to
> miss it, by how far, etc.

We now agree that bandwidth reservation comes in very handy. But we
still feel that making use of RT bandwidth itself and reserving some
for EDF is sufficient. We are working on getting this up. Having said
this, further, let us know if we are missing anything here or we
understood it wrong. According to our understanding, we see that the
Bandwidth reservation + 'budgeting' in your patch has the following
impact on the system:

1. Admission Control made configurable
2. DOMINO effect
3. System analysis
4. Replenishment of deadlines to the task missing their deadlines.

1. Admission Control made configurable

You take in something called as a "budget" and "deadline" from the
user and these parameters decide the sanity of the entire real-time
system.

How does the user provide "budget" ?
- If the budget of a task is its WCET, then admission control works
just fine here.
- If the budget of a task is less than its WCET, admission control
does not have much meaning, right?

So, what we have different in our designs w.r.t how user sees it?

- We depend on user to provide meaningful deadlines. You depend on
user to provide meaningful deadlines + meaningful budget.

- We depend on user / system architect to decide the schedulability of
a task set. You take in the numbers from the user, but calculate it
yourself. OK, that way you have some configurabality!

2. DOMINO effect

You prevent Domino effect by checking the available bandwidth against
the budget, when you see that the task will not finish in the
remaining bandwidth, you push  the task out. you replenish the budget
and the deadline for the task for the next execution cycle. And now, I
guess you are sending a signal to the user.

We prevent this by sending a signal to the user and the default
handler kills the task, thus not affecting the deadlines of the other
tasks.

3. System Analysis

You can analyse the system about your tasks meeting their deadline or
not, about who's going to miss it, by how far "a priori". Also, This
is fair only if your budgeting is done well.

Agree, we cannot do any analysis "prior" to missing its deadline. I
cant yet figure a use case where system analysis is absolutely
required in a real-time scenario while execution, but while trial and
error, this might definitely come in handy. For me, task either misses
a deadline, or successfully completes in-time, probably only
sufficient while in runtime.

4. Replenish deadline of the deadline missed task

Hmm, cant yet understand a good use case of this. If user "wants" to
do this, he can do it from the signal handler itself in our case. But
we still feel doing that yourself without the user knowing it is
changing the very "meaning" of a task deadline!

I guess this comes in use for a periodic task?? Correct me if I am
wrong, but "replenishment of a task deadline" for a non-periodic task
---- doesnt make any sense for its very meaning of a deadline is
lost...

If this is for a periodic task, and "postponing" a given user
_deadline_ is _tolerable_ in the system, why do you need EDF for this?
Wont an FP scheduler take better care in such case??


>
> You can have an EDF task A executing for more than what you expected (if
> you expected something, and you _should_ expect something if you want to
> analyze the system at some level, don't you?) and maybe missing its
> deadline.

When user specifies a deadline, the task should finish within its
deadline, thats his expectation!! About missing its deadline, thats
where the fault handler comes into picture where user also specifes
the time before deadline when the handler is fired..

>
> Much more worse, you can have task A executing for more than what you
> expected and making task B and/or C and/or WHATEVER missing _their_
> deadline, even if they "behave well"... This is far from real-time
> guaranteed behaviour, at least in my opinion. :-(

If task 'A' is getting executed, that means, task A has the earliest
deadline, task B and C cannot miss their deadline just because task
'A' is getting executed -- it 'cant' get executed for more than what
is expected in the system unless user specifies so, remember?
According to my expectation task A should finish within the deadline.
If it does not, it depends upon the user what to do with the task like
increase the deadline, or terminate it. We send a signal to the
process intimating the same. And if every task behaves well and all
the task parameters are "respected during actual execution" I don't
see any reason why other tasks would miss their deadline if the task
set is proper in the system.
>
>> If the user puts all his EDF tasks in
>> priority 1 , only his tasks run. In that case the entire RT bandwidth
>> is reserved for the EDF tasks. In a way your patch also does the same
>> thing by placing itself above the RT scheduling class.
>>
> Agree on this, never said something different. :-)
>
> At least it is well known that deadline tasks have higher priority than
> FIFO/RR tasks that have higher priority than OTHER tasks. This, together
> with reservation based scheduling at the task (or at least task-group)
> level is what make the system analyzable and predictable.
>

ya ya.. analysable is something we lack here !! totally agreed..... :)

>> Only thing what
>> we don't have in place is partitioning of RT bandwidth across RR/FIFO
>> and EDF, which right now, we overcome by intelligently placing the
>> tasks with different policies in different priority levels.
>>
> I'm not finding the 'intelligent placing' in the patch, so I guess this
> is up to the userspace. Providing the userspace with a flexible solution
> is something very useful... Relying on userspace to do things
> 'intelligently' is something I'm not sure I would do, especially in a so
> much general purpose OS like Linux, used in so much different contexts.
> But, again, that's only my opinion. :-)

Hmm I guess you too are "totally" dependent on user giving you the
right parameters _intelligently_ (deadline / budget)... I guess we are
not too different there expecting the users to be _aware_ ..!

>
> If I understood the code well (somme comments here and there would have
> helped! :-P) one (or more) EDF task(s) can starve FIFO/RR tasks, which
> may happen to me as well. However, it also may happen that one (or more)
> FIFO/RR task(s) starve EDF tasks!
>
> Thus, there always could be someone which might be starved and you can't
> even say who it will be... Again, this seems lack of determinism to me.

Right, but now that we agree on Bandwidth reservation with RT class,
this will be thankfully, resolved.

>
>> If you are asking bandwidth reservation for guaranteeing determinism,
>> we definitely have determinism in place, but bandwidth reservation for
>> other real-time scheduling policies is not in place.
>>
> See? World is so beautiful because there are so much different possible
> opinions and interpretations of the same concepts! :-D :-D
>

Differences in Language and communication makes it even more beautiful!!!! ;-)

>> > - admission test should guarantee no oversubscription
>>
>> So, you are calculating the WCET online in the scheduler right?
>>
> No, I don't... Did you looked at the code?
>
>> Can it
>> calculate the amount of CPU time with the  required preciseness? Here,
>> you are increasing the enqueue time by adding an O(n) calculation for
>> every task that you enqueue.
>>
> No, I don't... Did you looked at the code? :-P
>
>> That is the reason why for a small
>> system, pushing this to architect made better sense in terms of
>> decreased latencies where the turn around time from when the task
>> enters till it gets the desired result matters, e.g., reading a sensor
>> 2 times in 1ms.
>>
> Given the fact that I do not have anything in the scheduler that
> increase latencies and enqueue/dequeue overhead, it sure depends on
> you're target, as already said.
>
> You keep saying that for a small system it is up to the system architect
> to check if the configuration will be schedulable or not, which may be
> reasonable.
> What I'm wondering is how this poor guy might do that and hope to have
> this enforced by a scheduling policy which allows a task to interfere
> with all the other ones to the point of making them missing their
> deadlines... And this could happen in your code, since you only have
> deadline miss based checks, which may be not enough to prevent it.
>

Well, this in your case depends on the sanity of the 'budget" the user
provides, the same way it depends on the sanity of the "deadline" the
user provides. And, _NO_, a task missing its deadline _cannot_ make
other tasks miss their deadline too if the deadlines given are sane.

>> > That's why we changed the name and the interface from _EDF/_edf (yep, it
>> > has been our first choice too! :-P) to _DEADLINE/_deadline, and that's
>> > why I think we should continue striving for even more
>> > interface-algorithm independence.
>> >
>> True, but we really think its a matter of trade-off between how much
>> response time you can guarantee for a real-time task v/s how much
>> scalable you want your design to be.
>>
> Well, I'm not seeing how trying to have a better interface/algorithm
> separation would affect the response time that much... For example, I
> don't expect that putting your code in a separate scheduling class would
> make you miss some deadline...
>
>> The deterministic response times
>> that you might have achieved by having all these features might be
>> good enough (Not sure of your numbers here) in a soft real time
>> scenario, but wondering if it would meet ends otherwise.
>>
> The response time I can achieve with all these features is exactly the
> same you can achieve with the current FIFO/RR task, which have more or
> less the same features. Actually, the scheduling overhead is even
> smaller than in rt tasks since we are still able to enforce bandwidth
> without the need of hierarchical scheduling and accounting...
>
> The added feature of being able to asking the scheduler that you don't
> want you're task response time, latency and ability to meet its deadline
> to be affected by some other task which is running away comes with no
> price in therms of response time.
>
> By the way, what numbers do you miss here? Just ask and I'll do my best
> to provide them to you...
>
>> Yes, the target was industrial control systems where we needed
>> deterministic real-time response and also the responsiveness of the
>> task was critical. Here, the demanding real-time tasks were not too
>> many (~4/5 at a given point in time) and also, there were other user
>> tasks which had to update the results of this real-time task remotely.
>> Hence, we were very vary of introducing latencies in the system.
>> Instead, we focused on bringing in determinism into the system without
>> increasing its latency!
>>
> Hey, 'the system' already has a scheduling policy called SCHED_FIFO
> which already have _a_lot_ of determinism... and EDF is **not** more
> deterministic than fix-priority! There are people that like more EDF
> than FP, there are people that like more FP than EDF, they both have
> advantages and drawbacks, but implementing EDF can't be claimed as
> 'bringing determinism'...
>
> So, now I'm curious :-D.
>
> You say you need EDF in that application scenario, which might be more
> than true, but the reason can't be 'lack of determinism' since FP
> scheduling is as much deterministic as you want/are able to configure it
> using the correct priorities... So what was your problem with it?
>
>> Also, the concept of a deadline miss handler
>> was very handy, for a task missing its deadline not to interfere with
>> the determinism of the other tasks.
>>
> Oh, ok. But I think we can agree that you can have a task that, as said
> above, not miss its own deadline --and thus you don't catch it-- but
> makes all the other tasks in the system to miss their own ones!
>
> How your definition of determinism applies on this situation? :-O

If the task deadlines are proper and "respected" during actual
execution, and user supplies correct deadlines to the tasks I donot
see any reason why the system is not deterministic!!
>
>> > Mmm... I'm not sure I see why and how your patch should affect context
>> > switches duration... However, do you have the testcases for such tests?
>> >
>>
>> Well we are actually saying that it does _not_ effect the context
>> switch time :).
>>
> Which was expectable...
>
>> We are measuring the time when a task is entered in the system till it
>> gets scheduled both in preemptive and non-preemptive modes. This
>> figure does not change even for a loaded system which shows the
>> deterministic turn around time for a task in terms of scheduling
>> latencies.
>>
> ... Ok, it seems I need to be more explicit here: do you have the code
> of the tests, so that someone else can reproduce them?
>
It measures the time from the dequeue of the previous task to the
scheduling of the next task in the queue. Just variables to catch
times in the kernel and the user app would do..

Regards,
Shubhro
Soumya

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
  2009-10-28 16:24           ` Raistlin
  2009-11-10 14:03             ` Soumya K S
@ 2009-11-10 14:34             ` Peter Zijlstra
  1 sibling, 0 replies; 10+ messages in thread
From: Peter Zijlstra @ 2009-11-10 14:34 UTC (permalink / raw)
  To: Raistlin
  Cc: Soumya K S, linux-kernel, mingo, Dhaval Giani, Thomas Gleixner,
	Claudio Scordino, michael trimarchi, Juri Lelli

On Wed, 2009-10-28 at 17:24 +0100, Raistlin wrote:
> Relying on userspace to do things
> 'intelligently' is something I'm not sure I would do, especially in a so
> much general purpose OS like Linux, used in so much different contexts.
> But, again, that's only my opinion. :-) 

I'd put it even stronger, relying on userspace to do something
intelligent is utterly stupid. We have to assume userspace is hostile
and out to get you.

Therefore I fully support the model that puts admission control into the
kernel, because that provides isolation between, and guarantees to
applications.

Not providing this, and having no overload protection is one of the
biggest failures of SCHED_FIFO/RR.

On Tue, 2009-11-10 at 19:33 +0530, Soumya K S wrote:
> Hmm I guess you too are "totally" dependent on user giving you the
> right parameters _intelligently_ (deadline / budget)... I guess we are
> not too different there expecting the users to be _aware_ ..!

The difference is that if A messes up its own parameters only A suffers
and the rest of the system continues to work as expected.

With your approach anything is out the window.

So yes, userspace needs to be aware, but a task can only screw itself,
not everybody else, which is a very important feature.

@ DRTL folks:

If you want deadline based scheduling (it appears you do) I suggest you
start helping out Dario, your proposal isn't going to happen.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers
  2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S
  2009-10-22  7:54   ` Raistlin
@ 2009-10-22  9:33   ` Claudio Scordino
       [not found]   ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>
  2 siblings, 0 replies; 10+ messages in thread
From: Claudio Scordino @ 2009-10-22  9:33 UTC (permalink / raw)
  To: Soumya K S; +Cc: linux-kernel, tglx, mingo, Peter Zijlstra

Soumya K S ha scritto:
> Hello All,
> 
> We would like to present a patch on Deployment specific Real-Time
> Linux, topic discussed in LinuxCon 2009
> <http://events.linuxfoundation.org/lc09d17>
> 
> The developed framework allows user to specify the real-time
> strategies for a specific real-time scenario. User specifies
> configurations like scheduling policy, Deadline-miss Fault-tolerance
> limit, interrupt priorities, etc. Real-time applications use onetime
> gateway to notify kernel that they require real-time response. All
> applications use existing POSIX APIs. DRTL scheduler is time-aware and
> uses EDF as the scheduling policy.
> 
> The patch consists of Time aware scheduler having SCHED_EDF as the
> scheduling policy, Deadline based scheduling for Interrupt handlers,
> Deadline Inheritance support for RT-Mutexes.
> 
> The patch is for the kernel version 2.6.32-rc3. It has been tested on
> OMAP3530, ATMEL AT91SAM9261 and X86 platforms.
> 
> We look forward for support and feedback about
> DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en>
> and the patch for its feasibility, scalability and performance.
> 
> Many Thanks,
> Soumya KS
> Shubhro Sinha

Hi,

    FYI, a patch to add EDF scheduling to Linux has been already 
proposed in September (see http://lkml.org/lkml/2009/9/22/186), 
discussed on mailing list and presented with a talk at the last 
Real-Time Linux Workshop (RTLWS - 
http://www.osadl.org/Dresden-2009.rtlws11-dresden-2009.0.html).

The patch was initially called "SCHED_EDF". Then, on suggestion of 
Peter, the name has been changed to "SCHED_DEADLINE" and the code moved 
to a public git repository 
(http://gitorious.org/sched_deadline/sched-deadline).

Recently, the patch has been re-submitted to LKML (see 
http://lkml.org/lkml/2009/10/16/161) with many issues fixed. Among them: 
a new behavior of fork(), a new syscall for periodic tasks, flags to 
signal deadline misses and budget overruns. All these changes come from 
the comments and the feedback got on mailing list and at the workshop.

At a first glance, your patch extends sched_param (which may lead to 
compatibility problems with old code), it does not support SMP 
platforms, and it changes the sched_rt code instead of adding a new 
scheduling class.

For the reasons above, I think that we should stick with SCHED_DEADLINE, 
eventually integrating some parts of your code into that project.

Best regards,

              Claudio

^ permalink raw reply	[flat|nested] 10+ messages in thread

[parent not found: <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>]

* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline  Based Interrupt Handlers
       [not found]   ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>
@ 2009-10-22 14:40     ` Soumya K S
  0 siblings, 0 replies; 10+ messages in thread
From: Soumya K S @ 2009-10-22 14:40 UTC (permalink / raw)
  To: linux-kernel; +Cc: tglx, mingo

Making the patch inline,,
Thanks,
Soumya

diff -Naur linux-2.6.32-rc3/arch/arm/include/asm/signal.h
linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h
--- linux-2.6.32-rc3/arch/arm/include/asm/signal.h      2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h 2009-10-20
10:40:16.000000000 +0530
@@ -70,6 +70,7 @@
 #define SIGRTMIN       32
 #define SIGRTMAX       _NSIG

+#define SIGMISSDEAD    SIGRTMIN + 4
 #define SIGSWI         32

 /*
diff -Naur linux-2.6.32-rc3/include/linux/interrupt.h
linux-2.6.32-rc3-drtl/include/linux/interrupt.h
--- linux-2.6.32-rc3/include/linux/interrupt.h  2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/interrupt.h     2009-10-20
11:55:45.000000000 +0530
@@ -107,6 +107,12 @@
 };

 extern irqreturn_t no_action(int cpl, void *dev_id);
+#ifdef CONFIG_SCHED_EDF
+extern int __must_check
+request_irq_edf(unsigned int irq, irq_handler_t handler,irq_handler_t
thread_fn, unsigned long flags,
+               const char *name, void *dev, struct timespec *ts);
+#endif
+

 #ifdef CONFIG_GENERIC_HARDIRQS
 extern int __must_check
diff -Naur linux-2.6.32-rc3/include/linux/irq.h
linux-2.6.32-rc3-drtl/include/linux/irq.h
--- linux-2.6.32-rc3/include/linux/irq.h        2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/irq.h   2009-10-20
11:51:27.000000000 +0530
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/wait.h>
+#include <linux/ktime.h>

 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -206,6 +207,8 @@
       struct proc_dir_entry   *dir;
 #endif
       const char              *name;
+       ktime_t                 deadline;
+       unsigned                edf_flag;
 } ____cacheline_internodealigned_in_smp;

 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
diff -Naur linux-2.6.32-rc3/include/linux/plist.h
linux-2.6.32-rc3-drtl/include/linux/plist.h
--- linux-2.6.32-rc3/include/linux/plist.h      2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/plist.h 2009-10-20
11:00:31.000000000 +0530
@@ -87,6 +87,8 @@

 struct plist_node {
       int                     prio;
+       long long int           deadline;
+       int                     policy;
       struct plist_head       plist;
 };

@@ -142,9 +144,11 @@
 * @node:      &struct plist_node pointer
 * @prio:      initial node priority
 */
-static inline void plist_node_init(struct plist_node *node, int prio)
+static inline void plist_node_init(struct plist_node *node, int prio,
long long int deadline, int policy)
 {
       node->prio = prio;
+       node->deadline = deadline;
+       node->policy = policy;
       plist_head_init(&node->plist, NULL);
 }

diff -Naur linux-2.6.32-rc3/include/linux/sched.h
linux-2.6.32-rc3-drtl/include/linux/sched.h
--- linux-2.6.32-rc3/include/linux/sched.h      2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/include/linux/sched.h 2009-10-20
11:03:51.000000000 +0530
@@ -36,6 +36,7 @@
 #define SCHED_FIFO             1
 #define SCHED_RR               2
 #define SCHED_BATCH            3
+#define SCHED_EDF              123
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE             5
 /* Can be ORed in to make sure the process is reverted back to
SCHED_NORMAL on fork */
@@ -43,9 +44,6 @@

 #ifdef __KERNEL__

-struct sched_param {
-       int sched_priority;
-};

 #include <asm/param.h> /* for HZ */

@@ -102,6 +100,11 @@
 struct bts_context;
 struct perf_event_context;

+struct sched_param {
+       int sched_priority;
+       struct timespec deadline;
+};
+
 /*
 * List of flags we want to share for kernel threads,
 * if only because they are not used by them anyway.
@@ -195,6 +198,7 @@
 #define TASK_DEAD              64
 #define TASK_WAKEKILL          128
 #define TASK_WAKING            256
+#define EXIT_MISS_DEADLINE     512

 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE          (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -1201,6 +1205,9 @@
       int nr_cpus_allowed;

       struct sched_rt_entity *back;
+#ifdef CONFIG_SCHED_EDF
+       struct rb_node edf_node;
+#endif
 #ifdef CONFIG_RT_GROUP_SCHED
       struct sched_rt_entity  *parent;
       /* rq on which this entity is (to be) queued: */
@@ -1232,6 +1239,11 @@
       const struct sched_class *sched_class;
       struct sched_entity se;
       struct sched_rt_entity rt;
+#ifdef CONFIG_SCHED_EDF
+       ktime_t edf_deadline;
+       ktime_t rt_deadline;
+       struct timespec orig_deadline;
+#endif

 #ifdef CONFIG_PREEMPT_NOTIFIERS
       /* list of struct preempt_notifier: */
@@ -1733,6 +1745,7 @@
 #define PF_EXITING     0x00000004      /* getting shut down */
 #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
 #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
+#define        PF_HARDIRQ      0x00000020      /* hardirq context */
 #define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
@@ -1932,7 +1945,7 @@

 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
-extern void rt_mutex_setprio(struct task_struct *p, int prio);
+extern void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t
*deadline);
 extern void rt_mutex_adjust_pi(struct task_struct *p);
 #else
 static inline int rt_mutex_getprio(struct task_struct *p)
diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig
--- linux-2.6.32-rc3/init/Kconfig       2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/init/Kconfig  2009-10-20 10:40:16.000000000 +0530
@@ -425,6 +425,12 @@
 #
 config HAVE_UNSTABLE_SCHED_CLOCK
       bool
+
+config SCHED_EDF
+       bool "EDF Scheduler Support"
+       default n
+       depends on !GROUP_SCHED
+       depends on !SMP

 config GROUP_SCHED
       bool "Group CPU scheduler"
diff -Naur linux-2.6.32-rc3/init/main.c linux-2.6.32-rc3-drtl/init/main.c
--- linux-2.6.32-rc3/init/main.c        2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/init/main.c   2009-10-20 10:40:16.000000000 +0530
@@ -101,6 +101,10 @@
 enum system_states system_state __read_mostly;
 EXPORT_SYMBOL(system_state);

+#ifdef CONFIG_SCHED_EDF
+void kthread_deadmiss(void);
+#endif
+
 /*
 * Boot command-line arguments
 */
@@ -428,6 +432,9 @@
       numa_default_policy();
       pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
       kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
+#ifdef CONFIG_SCHED_EDF
+       kernel_thread(kthread_deadmiss, NULL, CLONE_FS | CLONE_FILES);
+#endif
       unlock_kernel();

       /*
diff -Naur linux-2.6.32-rc3/kernel/futex.c linux-2.6.32-rc3-drtl/kernel/futex.c
--- linux-2.6.32-rc3/kernel/futex.c     2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/futex.c        2009-10-20
11:12:18.000000000 +0530
@@ -1384,7 +1384,7 @@
        */
       prio = min(current->normal_prio, MAX_RT_PRIO);

-       plist_node_init(&q->list, prio);
+       plist_node_init(&q->list, prio, 0, 0);
 #ifdef CONFIG_DEBUG_PI_LIST
       q->list.plist.lock = &hb->lock;
 #endif
diff -Naur linux-2.6.32-rc3/kernel/irq/manage.c
linux-2.6.32-rc3-drtl/kernel/irq/manage.c
--- linux-2.6.32-rc3/kernel/irq/manage.c        2009-10-05
05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/irq/manage.c   2009-10-20
10:41:06.000000000 +0530
@@ -536,7 +536,16 @@
       struct irq_desc *desc = irq_to_desc(action->irq);
       int wake, oneshot = desc->status & IRQ_ONESHOT;

-       sched_setscheduler(current, SCHED_FIFO, &param);
+       current->flags |= PF_HARDIRQ;
+       if (desc->edf_flag)
+       {
+               param.deadline.tv_sec = desc->deadline.tv.sec;
+               param.deadline.tv_nsec = desc->deadline.tv.nsec;
+               sched_setscheduler(current, SCHED_EDF, &param);
+       }
+       else
+               sched_setscheduler(current, SCHED_FIFO, &param);
+
       current->irqaction = action;

       while (!irq_wait_for_interrupt(action)) {
@@ -1088,3 +1097,18 @@
       return retval;
 }
 EXPORT_SYMBOL(request_threaded_irq);
+
+#ifdef CONFIG_SCHED_EDF
+int request_irq_edf (unsigned int irq, irq_handler_t handler,
irq_handler_t thread_fn,
+                       unsigned long irqflags, const char *devname, void
*dev_id, struct timespec *deadline)
+{
+        struct irq_desc *desc;
+        desc = irq_to_desc(irq);
+        desc->deadline.tv.sec = deadline->tv_sec;
+        desc->deadline.tv.nsec = deadline->tv_nsec;
+        desc->edf_flag = 1;
+        return request_threaded_irq (irq, handler,thread_fn,
irqflags, devname, dev_id);
+}
+EXPORT_SYMBOL(request_irq_edf);
+#endif
+
diff -Naur linux-2.6.32-rc3/kernel/kthread.c
linux-2.6.32-rc3-drtl/kernel/kthread.c
--- linux-2.6.32-rc3/kernel/kthread.c   2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/kthread.c      2009-10-20
11:54:12.000000000 +0530
@@ -33,6 +33,14 @@
       struct list_head list;
 };

+#ifdef CONFIG_SCHED_EDF
+struct kthread_deadmiss_info {
+       struct completion deadmiss;
+       struct task_struct *k;
+} kthread_dead_info;
+EXPORT_SYMBOL (kthread_dead_info);
+#endif
+
 struct kthread {
       int should_stop;
       struct completion exited;
@@ -247,3 +255,30 @@

       return 0;
 }
+
+#ifdef CONFIG_SCHED_EDF
+void dead_miss_default (void)
+{
+       struct task_struct *tsk = current;
+       set_task_comm(current, "Deadmiss Default");
+       /* Try to stop the runaway thread */
+       kthread_stop (kthread_dead_info.k);
+}
+
+void kthread_deadmiss (void)
+{
+       struct task_struct *tsk = current;
+
+        set_task_comm(tsk, "Deadmiss Thread");
+        ignore_signals(tsk);
+        current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+
+        while (1)
+        {
+                init_completion(&kthread_dead_info.deadmiss);
+                wait_for_completion (&kthread_dead_info.deadmiss);
+               kthread_run(dead_miss_default,NULL,"Deadmiss Default");
+
+        }
+}
+#endif
diff -Naur linux-2.6.32-rc3/kernel/rtmutex.c
linux-2.6.32-rc3-drtl/kernel/rtmutex.c
--- linux-2.6.32-rc3/kernel/rtmutex.c   2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/rtmutex.c      2009-10-20
11:13:24.000000000 +0530
@@ -112,6 +112,19 @@
                  task->normal_prio);
 }

+#ifdef CONFIG_SCHED_EDF
+ktime_t rt_mutex_getdeadline(struct task_struct *task)
+{
+
+        if (likely(!task_has_pi_waiters(task))) {
+                return task->rt_deadline;
+        }
+        if (ktime_sub((ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline,task->rt_deadline).tv64
< 0 )
+                return
(ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline;
+        else return task->rt_deadline;
+}
+#endif
+
 /*
 * Adjust the priority of a task, after its pi_waiters got modified.
 *
@@ -122,7 +135,20 @@
       int prio = rt_mutex_getprio(task);

       if (task->prio != prio)
-               rt_mutex_setprio(task, prio);
+               rt_mutex_setprio(task, prio, NULL);
+
+#ifdef CONFIG_SCHED_EDF
+       else {
+                if ((task_top_pi_waiter(task)->pi_list_entry.policy
== SCHED_EDF && task->policy == SCHED_EDF)
+                       || (!task_has_pi_waiters(task)) &&
task->policy == SCHED_EDF) {
+                        ktime_t deadline = rt_mutex_getdeadline(task);
+                        if (!ktime_equal (deadline, task->edf_deadline))
+                                rt_mutex_setprio(task,prio,&deadline);
+                        return;
+                }
+        }
+#endif
+
 }

 /*
@@ -424,8 +450,14 @@
       __rt_mutex_adjust_prio(task);
       waiter->task = task;
       waiter->lock = lock;
-       plist_node_init(&waiter->list_entry, task->prio);
-       plist_node_init(&waiter->pi_list_entry, task->prio);
+#ifdef CONFIG_SCHED_EDF
+       plist_node_init(&waiter->list_entry,
current->prio,current->edf_deadline.tv64,current->policy);
+        plist_node_init(&waiter->pi_list_entry,
current->prio,current->edf_deadline.tv64,current->policy);
+#else
+       plist_node_init(&waiter->list_entry, task->prio, 0,0);
+       plist_node_init(&waiter->pi_list_entry, task->prio,0,0);
+#endif
+

       /* Get the top priority waiter on the lock */
       if (rt_mutex_has_waiters(lock))
diff -Naur linux-2.6.32-rc3/kernel/sched.c linux-2.6.32-rc3-drtl/kernel/sched.c
--- linux-2.6.32-rc3/kernel/sched.c     2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched.c        2009-10-20
11:10:33.000000000 +0530
@@ -121,7 +121,7 @@

 static inline int rt_policy(int policy)
 {
-       if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+       if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR || policy ==
SCHED_EDF))
               return 1;
       return 0;
 }
@@ -451,6 +451,9 @@
 struct rt_rq {
       struct rt_prio_array active;
       unsigned long rt_nr_running;
+#ifdef CONFIG_SCHED_EDF
+       unsigned long edf_running;
+#endif
 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
       struct {
               int curr; /* highest queued rt task prio */
@@ -479,6 +482,11 @@
       struct task_group *tg;
       struct sched_rt_entity *rt_se;
 #endif
+#ifdef CONFIG_SCHED_EDF
+       struct rb_root          edf_root;
+       struct sched_rt_entity  *edf_next;
+
+#endif
 };

 #ifdef CONFIG_SMP
@@ -1816,6 +1824,9 @@
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
+#ifdef CONFIG_SCHED_EDF
+#include "sched_edf.c"
+#endif
 #include "sched_rt.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
@@ -2560,8 +2571,12 @@
       /* Want to start with kernel preemption disabled. */
       task_thread_info(p)->preempt_count = 1;
 #endif
-       plist_node_init(&p->pushable_tasks, MAX_PRIO);

+#ifdef CONFIG_SCHED_EDF
+       plist_node_init(&p->pushable_tasks, MAX_PRIO, p->edf_deadline.tv64,
p->policy);
+#else
+       plist_node_init(&p->pushable_tasks, MAX_PRIO, 0, 0);
+#endif
       put_cpu();
 }

@@ -5942,7 +5957,7 @@
 *
 * Used by the rt_mutex code to implement priority inheritance logic.
 */
-void rt_mutex_setprio(struct task_struct *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline)
 {
       unsigned long flags;
       int oldprio, on_rq, running;
@@ -5966,7 +5981,13 @@
               p->sched_class = &rt_sched_class;
       else
               p->sched_class = &fair_sched_class;
-
+
+#ifdef CONFIG_SCHED_EDF
+       if (p->policy == SCHED_EDF && deadline != NULL)
+       {
+               p->edf_deadline = *deadline;
+       }
+#endif
       p->prio = prio;

       if (running)
@@ -6150,6 +6171,7 @@
               break;
       case SCHED_FIFO:
       case SCHED_RR:
+       case SCHED_EDF:
               p->sched_class = &rt_sched_class;
               break;
       }
@@ -6197,7 +6219,7 @@
               reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
               policy &= ~SCHED_RESET_ON_FORK;

-               if (policy != SCHED_FIFO && policy != SCHED_RR &&
+               if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_EDF &&
                               policy != SCHED_NORMAL && policy !=
SCHED_BATCH &&
                               policy != SCHED_IDLE)
                       return -EINVAL;
@@ -6344,7 +6366,7 @@
 {
       return __sched_setscheduler(p, policy, param, false);
 }
-
+EXPORT_SYMBOL(sched_setscheduler_nocheck);
 static int
 do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 {
@@ -6361,7 +6383,17 @@
       retval = -ESRCH;
       p = find_process_by_pid(pid);
       if (p != NULL)
+       {
+#ifdef CONFIG_SCHED_EDF
+               if (policy == SCHED_EDF)
+               {
+                       p->edf_deadline =
ktime_add(ktime_get(),timespec_to_ktime
(lparam.deadline));
+                       p->orig_deadline = lparam.deadline;
+                       p->rt_deadline = p->edf_deadline;
+               }
+#endif
               retval = sched_setscheduler(p, policy, &lparam);
+       }
       rcu_read_unlock();

       return retval;
@@ -6767,6 +6799,7 @@
       switch (policy) {
       case SCHED_FIFO:
       case SCHED_RR:
+       case SCHED_EDF:
               ret = MAX_USER_RT_PRIO-1;
               break;
       case SCHED_NORMAL:
@@ -6792,6 +6825,7 @@
       switch (policy) {
       case SCHED_FIFO:
       case SCHED_RR:
+       case SCHED_EDF:
               ret = 1;
               break;
       case SCHED_NORMAL:
@@ -9226,7 +9260,11 @@
 {
       struct rt_prio_array *array;
       int i;
-
+#ifdef CONFIG_SCHED_EDF
+       rt_rq->edf_root.rb_node = NULL;
+       rt_rq->edf_running = 0;
+       rt_rq->edf_next = NULL;
+#endif
       array = &rt_rq->active;
       for (i = 0; i < MAX_RT_PRIO; i++) {
               INIT_LIST_HEAD(array->queue + i);
diff -Naur linux-2.6.32-rc3/kernel/sched_edf.c
linux-2.6.32-rc3-drtl/kernel/sched_edf.c
--- linux-2.6.32-rc3/kernel/sched_edf.c 1970-01-01 05:30:00.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched_edf.c    2009-10-20
11:25:24.000000000 +0530
@@ -0,0 +1,116 @@
+#define check_bit(node1, node2) ((node1->rb_parent_color ^
node2->rb_parent_color)&2)
+static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct
rt_rq *rt_rq);
+static inline void dec_rt_tasks(struct sched_rt_entity *rt_se, struct
rt_rq *rt_rq);
+static int has_equal = 0;
+static struct sched_rt_entity *leftmost = NULL;
+static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se);
+static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se);
+static inline ktime_t edf_se_deadline (struct sched_rt_entity *rt_se)
+{
+       return (rt_task_of(rt_se))->edf_deadline;
+}
+void enqueue_task_edf(struct rq *rq, struct task_struct *p)
+{
+
+       struct rt_rq *rt_rq = &rq->rt;
+       struct rb_node **link = &rt_rq->edf_root.rb_node, *parent=NULL;
+       struct sched_rt_entity *entry;
+       int leftmost_flag= 1, equal = 0;
+       s64 diff;
+       u8 last_bit = 0;
+       /*
+       * Find the right place in the rbtree:
+        */
+       has_equal = 0;
+       if (p->flags & PF_HARDIRQ)
+               p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime
(p->orig_deadline));
+        while (*link) {
+                parent = *link;
+                entry = rb_entry(parent, struct sched_rt_entity, edf_node);
+                /*
+                 * We dont care about collisions. Nodes with
+                 * the same key stay together.
+                 */
+
+               diff = ktime_sub(p->edf_deadline,edf_se_deadline(entry)).tv64;
+               if (diff < 0) {
+                        link = &parent->rb_left;
+                } else if (diff == 0) {
+                        link = &parent->rb_left;
+                       last_bit = (parent->rb_parent_color & 0x02);
+                       equal = 1;
+               }
+               else {
+                        link = &parent->rb_right;
+                       leftmost_flag = 0;
+                }
+        }
+       rb_link_node (&p->rt.edf_node,parent,link);
+       rb_insert_color(&p->rt.edf_node,&rt_rq->edf_root);
+       if (!equal)
+               last_bit = (parent==NULL)?0x2:~(parent->rb_parent_color & 0x02);
+       p->rt.edf_node.rb_parent_color |= (last_bit & 0x02);
+       if (leftmost_flag)
+       {
+               leftmost = rt_rq->edf_next = &p->rt;
+               if (equal) {
+                       has_equal = 1;
+               }
+       }
+       (rt_rq_of_se(&p->rt))->edf_running++;
+       inc_rt_tasks(&p->rt,rt_rq_of_se(&p->rt));
+}
+
+void dequeue_task_edf(struct rq *rq, struct task_struct *p)
+{
+       struct rb_node *next_node;
+       struct rb_node *prev_node;
+       struct rb_node *assign_node;
+       if (rq->rt.edf_running > 2)
+       {
+               next_node = rb_next(&leftmost->edf_node);
+               if (&p->rt.edf_node == next_node)
+                       next_node = rb_next(next_node);
+               else if (&p->rt == leftmost)
+               {
+                       leftmost = rb_entry (next_node, struct
sched_rt_entity, edf_node);
+                       next_node = rb_next(&leftmost->edf_node);
+               }
+               if (&p->rt == rq->rt.edf_next)
+               {
+                       rq->rt.edf_next =
rb_entry(rb_next(&(rq->rt.edf_next->edf_node)),
struct sched_rt_entity, edf_node);
+                       if (has_equal && (rq->rt.edf_next == NULL ||
check_bit((&(p->rt.edf_node)),(&(rq->rt.edf_next->edf_node)))))
+                               rq->rt.edf_next = leftmost;
+               }
+               has_equal = !check_bit((&leftmost->edf_node), next_node);
+       }
+       else
+       {
+
+               next_node = rb_next (&p->rt.edf_node);
+               prev_node = rb_prev (&p->rt.edf_node);
+               assign_node = (next_node == NULL) ? prev_node : next_node;
+               if (assign_node != NULL)
+                       leftmost = rq->rt.edf_next =
rb_entry(assign_node, struct
sched_rt_entity, edf_node);
+               else
+                       leftmost = rq->rt.edf_next = NULL;
+               has_equal = 0;
+       }
+       (rt_rq_of_se(&p->rt))->edf_running--;
+       dec_rt_tasks(&p->rt, rt_rq_of_se(&p->rt));
+       rb_erase(&p->rt.edf_node, &rq->rt.edf_root);
+}
+
+struct sched_rt_entity *pick_next_task_edf(struct rq *rq)
+{
+       struct sched_rt_entity *retval;
+       struct rb_node *next_node;
+       retval = rq->rt.edf_next;
+       if (has_equal)
+       {
+               next_node = rb_next(&retval->edf_node);
+               rq->rt.edf_next = (next_node == NULL ||
check_bit((&rq->rt.edf_next->edf_node), next_node)) ? leftmost :
+                       rb_entry(next_node, struct sched_rt_entity, edf_node);
+       }
+       return retval;
+}
diff -Naur linux-2.6.32-rc3/kernel/sched_rt.c
linux-2.6.32-rc3-drtl/kernel/sched_rt.c
--- linux-2.6.32-rc3/kernel/sched_rt.c  2009-10-05 05:42:30.000000000 +0530
+++ linux-2.6.32-rc3-drtl/kernel/sched_rt.c     2009-10-20
10:40:16.000000000 +0530
@@ -3,6 +3,14 @@
 * policies)
 */

+#ifdef CONFIG_SCHED_EDF
+struct kthread_deadmiss_info {
+       struct completion deadmiss;
+       struct task_struct *k;
+};
+extern struct kthread_deadmiss_info kthread_dead_info;
+#endif
+
 #ifdef CONFIG_RT_GROUP_SCHED

 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
@@ -607,6 +615,7 @@
       if (unlikely((s64)delta_exec < 0))
               delta_exec = 0;

+
       schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));

       curr->se.sum_exec_runtime += delta_exec;
@@ -614,7 +623,6 @@

       curr->se.exec_start = rq->clock;
       cpuacct_charge(curr, delta_exec);
-
       sched_rt_avg_update(rq, delta_exec);

       if (!rt_bandwidth_enabled())
@@ -885,6 +893,11 @@
       if (wakeup)
               rt_se->timeout = 0;

+#ifdef CONFIG_SCHED_EDF
+       if (p->policy == SCHED_EDF)
+               enqueue_task_edf(rq,p);
+       else
+#endif
       enqueue_rt_entity(rt_se);

       if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
@@ -896,7 +909,12 @@
       struct sched_rt_entity *rt_se = &p->rt;

       update_curr_rt(rq);
-       dequeue_rt_entity(rt_se);
+#ifdef CONFIG_SCHED_EDF
+       if (p->policy == SCHED_EDF)
+               dequeue_task_edf(rq,p);
+       else
+#endif
+               dequeue_rt_entity(rt_se);

       dequeue_pushable_task(rq, p);
 }
@@ -924,6 +942,11 @@
       struct sched_rt_entity *rt_se = &p->rt;
       struct rt_rq *rt_rq;

+#ifndef CONFIG_SCHED_EDF
+       if (p->policy == SCHED_EDF)
+               return;
+#endif
+
       for_each_sched_rt_entity(rt_se) {
               rt_rq = rt_rq_of_se(rt_se);
               requeue_rt_entity(rt_rq, rt_se, head);
@@ -1036,10 +1059,22 @@
       int idx;

       idx = sched_find_first_bit(array->bitmap);
+#ifdef CONFIG_SCHED_EDF
+       BUG_ON(!rt_rq->edf_next && idx >= MAX_RT_PRIO);
+#else
       BUG_ON(idx >= MAX_RT_PRIO);
+#endif

-       queue = array->queue + idx;
-       next = list_entry(queue->next, struct sched_rt_entity, run_list);
+#ifdef CONFIG_SCHED_EDF
+       if (!rt_rq->edf_next || rt_se_prio(rt_rq->edf_next) > idx) {
+#endif
+               queue = array->queue + idx;
+               next = list_entry(queue->next, struct sched_rt_entity,
run_list);
+
+#ifdef CONFIG_SCHED_EDF
+       }
+       else next = pick_next_task_edf(rq);
+#endif

       return next;
 }
@@ -1089,11 +1124,30 @@
       return p;
 }

+
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
       update_curr_rt(rq);
       p->se.exec_start = 0;
-
+#ifdef CONFIG_SCHED_EDF
+       /* Deadline miss handler for run away tasks */
+       if (p->policy == SCHED_EDF && !p->flags&PF_HARDIRQ)  {
+            if (ktime_sub(p->edf_deadline,ktime_get()).tv64 <= 0) {
+               if (p->flags&PF_KTHREAD) {
+                       dequeue_task_edf (rq,p);
+                       kthread_dead_info.k = p;
+                       complete (&kthread_dead_info.deadmiss);
+                       set_task_state(p, TASK_INTERRUPTIBLE);
+               }
+               else {
+                       sigaddset(&p->pending.signal,SIGMISSDEAD);
+                       set_tsk_thread_flag(p, TIF_SIGPENDING);
+                       p->exit_code = EXIT_MISS_DEADLINE;
+               }
+           }
+       }
+#endif
+
       /*
        * The previous task needs to be made eligible for pushing
        * if it is still active






On Wed, Oct 21, 2009 at 9:08 PM, Soumya K S <ssks.mt@gmail.com> wrote:
> Hello All,
>
> We would like to present a patch on Deployment specific Real-Time
> Linux, topic discussed in LinuxCon 2009
> <http://events.linuxfoundation.org/lc09d17>
>
> The developed framework allows user to specify the real-time
> strategies for a specific real-time scenario. User specifies
> configurations like scheduling policy, Deadline-miss Fault-tolerance
> limit, interrupt priorities, etc. Real-time applications use onetime
> gateway to notify kernel that they require real-time response. All
> applications use existing POSIX APIs. DRTL scheduler is time-aware and
> uses EDF as the scheduling policy.
>
> The patch consists of Time aware scheduler having SCHED_EDF as the
> scheduling policy, Deadline based scheduling for Interrupt handlers,
> Deadline Inheritance support for RT-Mutexes.
>
> The patch is for the kernel version 2.6.32-rc3. It has been tested on
> OMAP3530, ATMEL AT91SAM9261 and X86 platforms.
>
> We look forward for support and feedback about
> DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en>
> and the patch for its feasibility, scalability and performance.
>
> Many Thanks,
> Soumya KS
> Shubhro Sinha
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-11-10 14:34 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <e932a33e0910210833j1fab6129m23b55101c5978c4a@mail.gmail.com>
2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S
2009-10-22  7:54   ` Raistlin
2009-10-22 14:42     ` Soumya K S
2009-10-25  8:46       ` Raistlin
2009-10-28 14:15         ` Soumya K S
2009-10-28 16:24           ` Raistlin
2009-11-10 14:03             ` Soumya K S
2009-11-10 14:34             ` Peter Zijlstra
2009-10-22  9:33   ` Claudio Scordino
     [not found]   ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>
2009-10-22 14:40     ` Soumya K S

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox