* [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers [not found] <e932a33e0910210833j1fab6129m23b55101c5978c4a@mail.gmail.com> @ 2009-10-21 15:38 ` Soumya K S 2009-10-22 7:54 ` Raistlin ` (2 more replies) 0 siblings, 3 replies; 10+ messages in thread From: Soumya K S @ 2009-10-21 15:38 UTC (permalink / raw) To: linux-kernel; +Cc: tglx, mingo [-- Attachment #1: Type: text/plain, Size: 1156 bytes --] Hello All, We would like to present a patch on Deployment specific Real-Time Linux, topic discussed in LinuxCon 2009 <http://events.linuxfoundation.org/lc09d17> The developed framework allows user to specify the real-time strategies for a specific real-time scenario. User specifies configurations like scheduling policy, Deadline-miss Fault-tolerance limit, interrupt priorities, etc. Real-time applications use onetime gateway to notify kernel that they require real-time response. All applications use existing POSIX APIs. DRTL scheduler is time-aware and uses EDF as the scheduling policy. The patch consists of Time aware scheduler having SCHED_EDF as the scheduling policy, Deadline based scheduling for Interrupt handlers, Deadline Inheritance support for RT-Mutexes. The patch is for the kernel version 2.6.32-rc3. It has been tested on OMAP3530, ATMEL AT91SAM9261 and X86 platforms. We look forward for support and feedback about DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en> and the patch for its feasibility, scalability and performance. Many Thanks, Soumya KS Shubhro Sinha [-- Attachment #2: README --] [-- Type: application/octet-stream, Size: 102 bytes --] Patch for Deployment Specific Real-Time Linux Support Soumya KS and Shubhro Sinha <ssks.mt@gmail.com> [-- Attachment #3: drtl_patch_2010_1 --] [-- Type: application/octet-stream, Size: 22601 bytes --] diff -Naur linux-2.6.32-rc3/arch/arm/include/asm/signal.h linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h --- linux-2.6.32-rc3/arch/arm/include/asm/signal.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h 2009-10-20 10:40:16.000000000 +0530 @@ -70,6 +70,7 @@ #define SIGRTMIN 32 #define SIGRTMAX _NSIG +#define SIGMISSDEAD SIGRTMIN + 4 #define SIGSWI 32 /* diff -Naur linux-2.6.32-rc3/include/linux/interrupt.h linux-2.6.32-rc3-drtl/include/linux/interrupt.h --- linux-2.6.32-rc3/include/linux/interrupt.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/interrupt.h 2009-10-20 11:55:45.000000000 +0530 @@ -107,6 +107,12 @@ }; extern irqreturn_t no_action(int cpl, void *dev_id); +#ifdef CONFIG_SCHED_EDF +extern int __must_check +request_irq_edf(unsigned int irq, irq_handler_t handler,irq_handler_t thread_fn, unsigned long flags, + const char *name, void *dev, struct timespec *ts); +#endif + #ifdef CONFIG_GENERIC_HARDIRQS extern int __must_check diff -Naur linux-2.6.32-rc3/include/linux/irq.h linux-2.6.32-rc3-drtl/include/linux/irq.h --- linux-2.6.32-rc3/include/linux/irq.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/irq.h 2009-10-20 11:51:27.000000000 +0530 @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/topology.h> #include <linux/wait.h> +#include <linux/ktime.h> #include <asm/irq.h> #include <asm/ptrace.h> @@ -206,6 +207,8 @@ struct proc_dir_entry *dir; #endif const char *name; + ktime_t deadline; + unsigned edf_flag; } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, diff -Naur linux-2.6.32-rc3/include/linux/plist.h linux-2.6.32-rc3-drtl/include/linux/plist.h --- linux-2.6.32-rc3/include/linux/plist.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/plist.h 2009-10-20 11:00:31.000000000 +0530 @@ -87,6 +87,8 @@ struct plist_node { int prio; + long long int deadline; + int policy; struct plist_head plist; }; @@ -142,9 +144,11 @@ * @node: &struct plist_node pointer * @prio: initial node priority */ -static inline void plist_node_init(struct plist_node *node, int prio) +static inline void plist_node_init(struct plist_node *node, int prio, long long int deadline, int policy) { node->prio = prio; + node->deadline = deadline; + node->policy = policy; plist_head_init(&node->plist, NULL); } diff -Naur linux-2.6.32-rc3/include/linux/sched.h linux-2.6.32-rc3-drtl/include/linux/sched.h --- linux-2.6.32-rc3/include/linux/sched.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/sched.h 2009-10-20 11:03:51.000000000 +0530 @@ -36,6 +36,7 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #define SCHED_BATCH 3 +#define SCHED_EDF 123 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ @@ -43,9 +44,6 @@ #ifdef __KERNEL__ -struct sched_param { - int sched_priority; -}; #include <asm/param.h> /* for HZ */ @@ -102,6 +100,11 @@ struct bts_context; struct perf_event_context; +struct sched_param { + int sched_priority; + struct timespec deadline; +}; + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -195,6 +198,7 @@ #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 +#define EXIT_MISS_DEADLINE 512 /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -1201,6 +1205,9 @@ int nr_cpus_allowed; struct sched_rt_entity *back; +#ifdef CONFIG_SCHED_EDF + struct rb_node edf_node; +#endif #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ @@ -1232,6 +1239,11 @@ const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_EDF + ktime_t edf_deadline; + ktime_t rt_deadline; + struct timespec orig_deadline; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -1733,6 +1745,7 @@ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_HARDIRQ 0x00000020 /* hardirq context */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ @@ -1932,7 +1945,7 @@ #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline); extern void rt_mutex_adjust_pi(struct task_struct *p); #else static inline int rt_mutex_getprio(struct task_struct *p) diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig --- linux-2.6.32-rc3/init/Kconfig 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/init/Kconfig 2009-10-20 10:40:16.000000000 +0530 @@ -425,6 +425,12 @@ # config HAVE_UNSTABLE_SCHED_CLOCK bool + +config SCHED_EDF + bool "EDF Scheduler Support" + default n + depends on !GROUP_SCHED + depends on !SMP config GROUP_SCHED bool "Group CPU scheduler" diff -Naur linux-2.6.32-rc3/init/main.c linux-2.6.32-rc3-drtl/init/main.c --- linux-2.6.32-rc3/init/main.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/init/main.c 2009-10-20 10:40:16.000000000 +0530 @@ -101,6 +101,10 @@ enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); +#ifdef CONFIG_SCHED_EDF +void kthread_deadmiss(void); +#endif + /* * Boot command-line arguments */ @@ -428,6 +432,9 @@ numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); +#ifdef CONFIG_SCHED_EDF + kernel_thread(kthread_deadmiss, NULL, CLONE_FS | CLONE_FILES); +#endif unlock_kernel(); /* diff -Naur linux-2.6.32-rc3/kernel/futex.c linux-2.6.32-rc3-drtl/kernel/futex.c --- linux-2.6.32-rc3/kernel/futex.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/futex.c 2009-10-20 11:12:18.000000000 +0530 @@ -1384,7 +1384,7 @@ */ prio = min(current->normal_prio, MAX_RT_PRIO); - plist_node_init(&q->list, prio); + plist_node_init(&q->list, prio, 0, 0); #ifdef CONFIG_DEBUG_PI_LIST q->list.plist.lock = &hb->lock; #endif diff -Naur linux-2.6.32-rc3/kernel/irq/manage.c linux-2.6.32-rc3-drtl/kernel/irq/manage.c --- linux-2.6.32-rc3/kernel/irq/manage.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/irq/manage.c 2009-10-20 10:41:06.000000000 +0530 @@ -536,7 +536,16 @@ struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; - sched_setscheduler(current, SCHED_FIFO, ¶m); + current->flags |= PF_HARDIRQ; + if (desc->edf_flag) + { + param.deadline.tv_sec = desc->deadline.tv.sec; + param.deadline.tv_nsec = desc->deadline.tv.nsec; + sched_setscheduler(current, SCHED_EDF, ¶m); + } + else + sched_setscheduler(current, SCHED_FIFO, ¶m); + current->irqaction = action; while (!irq_wait_for_interrupt(action)) { @@ -1088,3 +1097,18 @@ return retval; } EXPORT_SYMBOL(request_threaded_irq); + +#ifdef CONFIG_SCHED_EDF +int request_irq_edf (unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, void *dev_id, struct timespec *deadline) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + desc->deadline.tv.sec = deadline->tv_sec; + desc->deadline.tv.nsec = deadline->tv_nsec; + desc->edf_flag = 1; + return request_threaded_irq (irq, handler,thread_fn, irqflags, devname, dev_id); +} +EXPORT_SYMBOL(request_irq_edf); +#endif + diff -Naur linux-2.6.32-rc3/kernel/kthread.c linux-2.6.32-rc3-drtl/kernel/kthread.c --- linux-2.6.32-rc3/kernel/kthread.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/kthread.c 2009-10-20 11:54:12.000000000 +0530 @@ -33,6 +33,14 @@ struct list_head list; }; +#ifdef CONFIG_SCHED_EDF +struct kthread_deadmiss_info { + struct completion deadmiss; + struct task_struct *k; +} kthread_dead_info; +EXPORT_SYMBOL (kthread_dead_info); +#endif + struct kthread { int should_stop; struct completion exited; @@ -247,3 +255,30 @@ return 0; } + +#ifdef CONFIG_SCHED_EDF +void dead_miss_default (void) +{ + struct task_struct *tsk = current; + set_task_comm(current, "Deadmiss Default"); + /* Try to stop the runaway thread */ + kthread_stop (kthread_dead_info.k); +} + +void kthread_deadmiss (void) +{ + struct task_struct *tsk = current; + + set_task_comm(tsk, "Deadmiss Thread"); + ignore_signals(tsk); + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; + + while (1) + { + init_completion(&kthread_dead_info.deadmiss); + wait_for_completion (&kthread_dead_info.deadmiss); + kthread_run(dead_miss_default,NULL,"Deadmiss Default"); + + } +} +#endif diff -Naur linux-2.6.32-rc3/kernel/rtmutex.c linux-2.6.32-rc3-drtl/kernel/rtmutex.c --- linux-2.6.32-rc3/kernel/rtmutex.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/rtmutex.c 2009-10-20 11:13:24.000000000 +0530 @@ -112,6 +112,19 @@ task->normal_prio); } +#ifdef CONFIG_SCHED_EDF +ktime_t rt_mutex_getdeadline(struct task_struct *task) +{ + + if (likely(!task_has_pi_waiters(task))) { + return task->rt_deadline; + } + if (ktime_sub((ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline,task->rt_deadline).tv64 < 0 ) + return (ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline; + else return task->rt_deadline; +} +#endif + /* * Adjust the priority of a task, after its pi_waiters got modified. * @@ -122,7 +135,20 @@ int prio = rt_mutex_getprio(task); if (task->prio != prio) - rt_mutex_setprio(task, prio); + rt_mutex_setprio(task, prio, NULL); + +#ifdef CONFIG_SCHED_EDF + else { + if ((task_top_pi_waiter(task)->pi_list_entry.policy == SCHED_EDF && task->policy == SCHED_EDF) + || (!task_has_pi_waiters(task)) && task->policy == SCHED_EDF) { + ktime_t deadline = rt_mutex_getdeadline(task); + if (!ktime_equal (deadline, task->edf_deadline)) + rt_mutex_setprio(task,prio,&deadline); + return; + } + } +#endif + } /* @@ -424,8 +450,14 @@ __rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; - plist_node_init(&waiter->list_entry, task->prio); - plist_node_init(&waiter->pi_list_entry, task->prio); +#ifdef CONFIG_SCHED_EDF + plist_node_init(&waiter->list_entry, current->prio,current->edf_deadline.tv64,current->policy); + plist_node_init(&waiter->pi_list_entry, current->prio,current->edf_deadline.tv64,current->policy); +#else + plist_node_init(&waiter->list_entry, task->prio, 0,0); + plist_node_init(&waiter->pi_list_entry, task->prio,0,0); +#endif + /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) diff -Naur linux-2.6.32-rc3/kernel/sched.c linux-2.6.32-rc3-drtl/kernel/sched.c --- linux-2.6.32-rc3/kernel/sched.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched.c 2009-10-20 11:10:33.000000000 +0530 @@ -121,7 +121,7 @@ static inline int rt_policy(int policy) { - if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) + if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR || policy == SCHED_EDF)) return 1; return 0; } @@ -451,6 +451,9 @@ struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; +#ifdef CONFIG_SCHED_EDF + unsigned long edf_running; +#endif #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED struct { int curr; /* highest queued rt task prio */ @@ -479,6 +482,11 @@ struct task_group *tg; struct sched_rt_entity *rt_se; #endif +#ifdef CONFIG_SCHED_EDF + struct rb_root edf_root; + struct sched_rt_entity *edf_next; + +#endif }; #ifdef CONFIG_SMP @@ -1816,6 +1824,9 @@ #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" +#ifdef CONFIG_SCHED_EDF +#include "sched_edf.c" +#endif #include "sched_rt.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" @@ -2560,8 +2571,12 @@ /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif - plist_node_init(&p->pushable_tasks, MAX_PRIO); +#ifdef CONFIG_SCHED_EDF + plist_node_init(&p->pushable_tasks, MAX_PRIO, p->edf_deadline.tv64, p->policy); +#else + plist_node_init(&p->pushable_tasks, MAX_PRIO, 0, 0); +#endif put_cpu(); } @@ -5942,7 +5957,7 @@ * * Used by the rt_mutex code to implement priority inheritance logic. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline) { unsigned long flags; int oldprio, on_rq, running; @@ -5966,7 +5981,13 @@ p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; - + +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF && deadline != NULL) + { + p->edf_deadline = *deadline; + } +#endif p->prio = prio; if (running) @@ -6150,6 +6171,7 @@ break; case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: p->sched_class = &rt_sched_class; break; } @@ -6197,7 +6219,7 @@ reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); policy &= ~SCHED_RESET_ON_FORK; - if (policy != SCHED_FIFO && policy != SCHED_RR && + if (policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_EDF && policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE) return -EINVAL; @@ -6344,7 +6366,7 @@ { return __sched_setscheduler(p, policy, param, false); } - +EXPORT_SYMBOL(sched_setscheduler_nocheck); static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) { @@ -6361,7 +6383,17 @@ retval = -ESRCH; p = find_process_by_pid(pid); if (p != NULL) + { +#ifdef CONFIG_SCHED_EDF + if (policy == SCHED_EDF) + { + p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (lparam.deadline)); + p->orig_deadline = lparam.deadline; + p->rt_deadline = p->edf_deadline; + } +#endif retval = sched_setscheduler(p, policy, &lparam); + } rcu_read_unlock(); return retval; @@ -6767,6 +6799,7 @@ switch (policy) { case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: @@ -6792,6 +6825,7 @@ switch (policy) { case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: ret = 1; break; case SCHED_NORMAL: @@ -9226,7 +9260,11 @@ { struct rt_prio_array *array; int i; - +#ifdef CONFIG_SCHED_EDF + rt_rq->edf_root.rb_node = NULL; + rt_rq->edf_running = 0; + rt_rq->edf_next = NULL; +#endif array = &rt_rq->active; for (i = 0; i < MAX_RT_PRIO; i++) { INIT_LIST_HEAD(array->queue + i); diff -Naur linux-2.6.32-rc3/kernel/sched_edf.c linux-2.6.32-rc3-drtl/kernel/sched_edf.c --- linux-2.6.32-rc3/kernel/sched_edf.c 1970-01-01 05:30:00.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched_edf.c 2009-10-20 11:25:24.000000000 +0530 @@ -0,0 +1,116 @@ +#define check_bit(node1, node2) ((node1->rb_parent_color ^ node2->rb_parent_color)&2) +static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq); +static inline void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq); +static int has_equal = 0; +static struct sched_rt_entity *leftmost = NULL; +static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se); +static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se); +static inline ktime_t edf_se_deadline (struct sched_rt_entity *rt_se) +{ + return (rt_task_of(rt_se))->edf_deadline; +} +void enqueue_task_edf(struct rq *rq, struct task_struct *p) +{ + + struct rt_rq *rt_rq = &rq->rt; + struct rb_node **link = &rt_rq->edf_root.rb_node, *parent=NULL; + struct sched_rt_entity *entry; + int leftmost_flag= 1, equal = 0; + s64 diff; + u8 last_bit = 0; + /* + * Find the right place in the rbtree: + */ + has_equal = 0; + if (p->flags & PF_HARDIRQ) + p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (p->orig_deadline)); + while (*link) { + parent = *link; + entry = rb_entry(parent, struct sched_rt_entity, edf_node); + /* + * We dont care about collisions. Nodes with + * the same key stay together. + */ + + diff = ktime_sub(p->edf_deadline,edf_se_deadline(entry)).tv64; + if (diff < 0) { + link = &parent->rb_left; + } else if (diff == 0) { + link = &parent->rb_left; + last_bit = (parent->rb_parent_color & 0x02); + equal = 1; + } + else { + link = &parent->rb_right; + leftmost_flag = 0; + } + } + rb_link_node (&p->rt.edf_node,parent,link); + rb_insert_color(&p->rt.edf_node,&rt_rq->edf_root); + if (!equal) + last_bit = (parent==NULL)?0x2:~(parent->rb_parent_color & 0x02); + p->rt.edf_node.rb_parent_color |= (last_bit & 0x02); + if (leftmost_flag) + { + leftmost = rt_rq->edf_next = &p->rt; + if (equal) { + has_equal = 1; + } + } + (rt_rq_of_se(&p->rt))->edf_running++; + inc_rt_tasks(&p->rt,rt_rq_of_se(&p->rt)); +} + +void dequeue_task_edf(struct rq *rq, struct task_struct *p) +{ + struct rb_node *next_node; + struct rb_node *prev_node; + struct rb_node *assign_node; + if (rq->rt.edf_running > 2) + { + next_node = rb_next(&leftmost->edf_node); + if (&p->rt.edf_node == next_node) + next_node = rb_next(next_node); + else if (&p->rt == leftmost) + { + leftmost = rb_entry (next_node, struct sched_rt_entity, edf_node); + next_node = rb_next(&leftmost->edf_node); + } + if (&p->rt == rq->rt.edf_next) + { + rq->rt.edf_next = rb_entry(rb_next(&(rq->rt.edf_next->edf_node)), struct sched_rt_entity, edf_node); + if (has_equal && (rq->rt.edf_next == NULL || check_bit((&(p->rt.edf_node)),(&(rq->rt.edf_next->edf_node))))) + rq->rt.edf_next = leftmost; + } + has_equal = !check_bit((&leftmost->edf_node), next_node); + } + else + { + + next_node = rb_next (&p->rt.edf_node); + prev_node = rb_prev (&p->rt.edf_node); + assign_node = (next_node == NULL) ? prev_node : next_node; + if (assign_node != NULL) + leftmost = rq->rt.edf_next = rb_entry(assign_node, struct sched_rt_entity, edf_node); + else + leftmost = rq->rt.edf_next = NULL; + has_equal = 0; + } + (rt_rq_of_se(&p->rt))->edf_running--; + dec_rt_tasks(&p->rt, rt_rq_of_se(&p->rt)); + rb_erase(&p->rt.edf_node, &rq->rt.edf_root); +} + +struct sched_rt_entity *pick_next_task_edf(struct rq *rq) +{ + struct sched_rt_entity *retval; + struct rb_node *next_node; + retval = rq->rt.edf_next; + if (has_equal) + { + next_node = rb_next(&retval->edf_node); + rq->rt.edf_next = (next_node == NULL || check_bit((&rq->rt.edf_next->edf_node), next_node)) ? leftmost : + rb_entry(next_node, struct sched_rt_entity, edf_node); + } + return retval; +} diff -Naur linux-2.6.32-rc3/kernel/sched_rt.c linux-2.6.32-rc3-drtl/kernel/sched_rt.c --- linux-2.6.32-rc3/kernel/sched_rt.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched_rt.c 2009-10-20 10:40:16.000000000 +0530 @@ -3,6 +3,14 @@ * policies) */ +#ifdef CONFIG_SCHED_EDF +struct kthread_deadmiss_info { + struct completion deadmiss; + struct task_struct *k; +}; +extern struct kthread_deadmiss_info kthread_dead_info; +#endif + #ifdef CONFIG_RT_GROUP_SCHED #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) @@ -607,6 +615,7 @@ if (unlikely((s64)delta_exec < 0)) delta_exec = 0; + schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; @@ -614,7 +623,6 @@ curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); - sched_rt_avg_update(rq, delta_exec); if (!rt_bandwidth_enabled()) @@ -885,6 +893,11 @@ if (wakeup) rt_se->timeout = 0; +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + enqueue_task_edf(rq,p); + else +#endif enqueue_rt_entity(rt_se); if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) @@ -896,7 +909,12 @@ struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); - dequeue_rt_entity(rt_se); +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + dequeue_task_edf(rq,p); + else +#endif + dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); } @@ -924,6 +942,11 @@ struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; +#ifndef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + return; +#endif + for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); requeue_rt_entity(rt_rq, rt_se, head); @@ -1036,10 +1059,22 @@ int idx; idx = sched_find_first_bit(array->bitmap); +#ifdef CONFIG_SCHED_EDF + BUG_ON(!rt_rq->edf_next && idx >= MAX_RT_PRIO); +#else BUG_ON(idx >= MAX_RT_PRIO); +#endif - queue = array->queue + idx; - next = list_entry(queue->next, struct sched_rt_entity, run_list); +#ifdef CONFIG_SCHED_EDF + if (!rt_rq->edf_next || rt_se_prio(rt_rq->edf_next) > idx) { +#endif + queue = array->queue + idx; + next = list_entry(queue->next, struct sched_rt_entity, run_list); + +#ifdef CONFIG_SCHED_EDF + } + else next = pick_next_task_edf(rq); +#endif return next; } @@ -1089,11 +1124,30 @@ return p; } + static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); p->se.exec_start = 0; - +#ifdef CONFIG_SCHED_EDF + /* Deadline miss handler for run away tasks */ + if (p->policy == SCHED_EDF && !p->flags&PF_HARDIRQ) { + if (ktime_sub(p->edf_deadline,ktime_get()).tv64 <= 0) { + if (p->flags&PF_KTHREAD) { + dequeue_task_edf (rq,p); + kthread_dead_info.k = p; + complete (&kthread_dead_info.deadmiss); + set_task_state(p, TASK_INTERRUPTIBLE); + } + else { + sigaddset(&p->pending.signal,SIGMISSDEAD); + set_tsk_thread_flag(p, TIF_SIGPENDING); + p->exit_code = EXIT_MISS_DEADLINE; + } + } + } +#endif + /* * The previous task needs to be made eligible for pushing * if it is still active ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S @ 2009-10-22 7:54 ` Raistlin 2009-10-22 14:42 ` Soumya K S 2009-10-22 9:33 ` Claudio Scordino [not found] ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com> 2 siblings, 1 reply; 10+ messages in thread From: Raistlin @ 2009-10-22 7:54 UTC (permalink / raw) To: Soumya K S Cc: linux-kernel, tglx, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi [-- Attachment #1: Type: text/plain, Size: 3053 bytes --] On Wed, 2009-10-21 at 21:08 +0530, Soumya K S wrote: > Hello All, > Hi, I'm Dario, from Pisa (Italy), and I'm working on EDF scheduling for the Linux kernel as you do (don't know if you've seen the threads and the patches, all info here http://gitorious.org/sched_deadline/pages/Home). > The developed framework allows user to specify the real-time > strategies for a specific real-time scenario. User specifies > configurations like scheduling policy, Deadline-miss Fault-tolerance > limit, interrupt priorities, etc. Real-time applications use onetime > gateway to notify kernel that they require real-time response. All > applications use existing POSIX APIs. DRTL scheduler is time-aware and > uses EDF as the scheduling policy. > Nice, from here, it seemed we were working on very similar things, and I was wondering if we could somehow collaborate... :-) Then I looked at the code and I saw our two designs are quite different, e.g., you don't constraint execution times, don't run any admission test to avoid oversubscription and you stop deadline missing tasks... Am I right? Even from the implementation point of view, I see you didn't used a new scheduling class. However, I think there still would be room for collaboration, if you are interested in... > The patch consists of Time aware scheduler having SCHED_EDF as the > scheduling policy, Deadline based scheduling for Interrupt handlers, > Deadline Inheritance support for RT-Mutexes. > I was very curious on how you dealt with deadline inheritance in SMPs, than I saw this in your patch: diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig +config SCHED_EDF + bool "EDF Scheduler Support" + default n + depends on !GROUP_SCHED + depends on !SMP :-P I'm right in the opposite situation, I've got SMP (partitioned for now, but we're working on migrations) and also CGROUPS support, but we are still wondering how deadline (or something more sophisticated, like bandwidth) inheritance could work in such a case... Again, I think I see collaboration possibilities, again, if you're interested in... > The patch is for the kernel version 2.6.32-rc3. It has been tested on > OMAP3530, ATMEL AT91SAM9261 and X86 platforms. > Ok... Are you also targeting (or plan to) preempt-rt kernels? > We look forward for support and feedback about > DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en> > and the patch for its feasibility, scalability and performance. > Do you already have any numbers or testcase? I have some (well, a few!) of them... I'll try to find the time to give it a try to your patch with them... Regards, Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ---------------------------------------------------------------------- Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy) http://blog.linux.it/raistlin / raistlin@ekiga.net / dario.faggioli@jabber.org [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 197 bytes --] ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-22 7:54 ` Raistlin @ 2009-10-22 14:42 ` Soumya K S 2009-10-25 8:46 ` Raistlin 0 siblings, 1 reply; 10+ messages in thread From: Soumya K S @ 2009-10-22 14:42 UTC (permalink / raw) To: Raistlin Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi On Thu, Oct 22, 2009 at 1:24 PM, Raistlin <raistlin@linux.it> wrote: > On Wed, 2009-10-21 at 21:08 +0530, Soumya K S wrote: >> Hello All, >> > Hi, > > I'm Dario, from Pisa (Italy), and I'm working on EDF scheduling for the > Linux kernel as you do (don't know if you've seen the threads and the > patches, all info here http://gitorious.org/sched_deadline/pages/Home). > Hi Dario, we needed a deadline based scheduler for DRTL and we zeroed in upon SCHED_EDF. We see that the threads here are fairly new too :-P >> The developed framework allows user to specify the real-time >> strategies for a specific real-time scenario. User specifies >> configurations like scheduling policy, Deadline-miss Fault-tolerance >> limit, interrupt priorities, etc. Real-time applications use onetime >> gateway to notify kernel that they require real-time response. All >> applications use existing POSIX APIs. DRTL scheduler is time-aware and >> uses EDF as the scheduling policy. >> > Nice, from here, it seemed we were working on very similar things, and I > was wondering if we could somehow collaborate... :-) > Sure! That would be good :) > Then I looked at the code and I saw our two designs are quite different, > e.g., you don't constraint execution times, don't run any admission test > to avoid oversubscription and you stop deadline missing tasks... Am I > right? > Even from the implementation point of view, I see you didn't used a new > scheduling class. > A simple system where there a few real-time tasks and a few non-real time tasks, the timelines can be architected out for each real-time task in the system. In such a case, given the RT bandwidth in the system, the task with the lowest deadline gets to be scheduled first till it is there in the system. In short, for such simple systems, we shifted the burden of admission control to the architect and kept close to the existing code. > However, I think there still would be room for collaboration, if you are > interested in... > >> The patch consists of Time aware scheduler having SCHED_EDF as the >> scheduling policy, Deadline based scheduling for Interrupt handlers, >> Deadline Inheritance support for RT-Mutexes. >> > I was very curious on how you dealt with deadline inheritance in SMPs, > than I saw this in your patch: > > diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig > +config SCHED_EDF > + bool "EDF Scheduler Support" > + default n > + depends on !GROUP_SCHED > + depends on !SMP > > :-P > > I'm right in the opposite situation, I've got SMP (partitioned for now, > but we're working on migrations) and also CGROUPS support, but we are > still wondering how deadline (or something more sophisticated, like > bandwidth) inheritance could work in such a case... > That's right, we are still working on SMP and hope there are no scalability issues in this patch w.r.t SMP. > Again, I think I see collaboration possibilities, again, if you're > interested in... > Definitely! >> The patch is for the kernel version 2.6.32-rc3. It has been tested on >> OMAP3530, ATMEL AT91SAM9261 and X86 platforms. >> > Ok... Are you also targeting (or plan to) preempt-rt kernels? > Yes, We do have the patches for preempt-rt kernel. >> We look forward for support and feedback about >> DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en> >> and the patch for its feasibility, scalability and performance. >> > Do you already have any numbers or testcase? I have some (well, a few!) > of them... I'll try to find the time to give it a try to your patch with > them... > We have tested Co-operative context switch time, Pre-emptive context switch time and Interrupt Latency, all of them are of ~130us for OMAP3530. Regards, Soumya Shubhro ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-22 14:42 ` Soumya K S @ 2009-10-25 8:46 ` Raistlin 2009-10-28 14:15 ` Soumya K S 0 siblings, 1 reply; 10+ messages in thread From: Raistlin @ 2009-10-25 8:46 UTC (permalink / raw) To: Soumya K S Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli [-- Attachment #1: Type: text/plain, Size: 5483 bytes --] On Thu, 2009-10-22 at 20:12 +0530, Soumya K S wrote: > Hi Dario, Hi again, > we needed a deadline based scheduler for DRTL and we zeroed > in upon SCHED_EDF. We see that the threads here are fairly new too :-P > Yes, we are not far from the starting point as well... :-) > > Nice, from here, it seemed we were working on very similar things, and I > > was wondering if we could somehow collaborate... :-) > > > Sure! That would be good :) > So... I looked at the code and, even if the aim of our two projects are quite the same, our implementations are very different. The main difference is the bandwidth reservation thing. I strongly think that, on a system like Linux, it should be very important to have --at least as a possibility-- the following features: - tasks can request for a guaranteed runtime over some time interval (bandwidth), - admission test should guarantee no oversubscription - bandwidth enforcing must avoid reciprocal tasks interferences. Maybe we can make the second and third configurable/optional (already thought about that, and it should be quite easy), but they need to be there, at least to avoid extending the interface again when we'll realize they'll needed! :-P I don't know how much you, and other people here, are familiar with such idea... We implemented it using one of the many existing algorithm. I can give you pointers to papers and other implementations of it if interested. To keep it short and practical, you can think at it as something similar to what MacOS-X (and I think Solaris) already have --and actively use, e.g., for the Jack Audio Connection Kit-- and call THREAD_TIME_CONSTRAINT (very poor docs, I think, but it gives the big picture): http://developer.apple.com/mac/library/documentation/Darwin/Conceptual/KernelProgramming/scheduler/scheduler.html#//apple_ref/doc/uid/TP30000905-CH211-BEHJDFCA Moreover, as Peter pointed out, coming out with an interface which is too much tightly tied to EDF, or to any other specific algorithm, wouldn't be the best idea. What would happen if somewhere in the future we decide to change from the EDF algorithm to some other deadline based one? That's why we changed the name and the interface from _EDF/_edf (yep, it has been our first choice too! :-P) to _DEADLINE/_deadline, and that's why I think we should continue striving for even more interface-algorithm independence. Obviously, also SMP has to be there! :-P We have it in fully partitioned right now, but I'll become global (with support for cpu-affinity) in very short time I hope (Juri, from ReTiS Lab in Pisa is already working on it). For rt-mutexes/deadline inheritance, the big issue is that, it works if you only have EDF, but with a "bandwidth aware scheduler", you need something more, which is why we don't have it yet... However, I think it could be a nice starting point. Finally, I like the idea of deadline IRQ handling and I think it would be worth to mind it some more. :-) > > Even from the implementation point of view, I see you didn't used a new > > scheduling class. > > > A simple system where there a few real-time tasks and a few non-real > time tasks, the timelines can be architected out for each real-time > task in the system. In such a case, given the RT bandwidth in the > system, the task with the lowest deadline gets to be scheduled first > till it is there in the system. > In short, for such simple systems, we shifted the burden of admission > control to the architect and kept close to the existing code. > Well, I kind of agree on both, _iff_ you're target is _only_ such small embedded systems. However, my very humble opinion is that, on Linux, you need something more general and, since we are talking about real-time, as much predictable and analyzable as you can get... And I think a separate scheduling class would be better suited for this. What do you think? > > I'm right in the opposite situation, I've got SMP (partitioned for now, > > but we're working on migrations) and also CGROUPS support, but we are > > still wondering how deadline (or something more sophisticated, like > > bandwidth) inheritance could work in such a case... > > > That's right, we are still working on SMP and hope there are no > scalability issues in this patch w.r.t SMP. > Well, I don't know. I guess achieving something similar to what we already have now (partitioned SMP) should not be impossible even with your approach... But if you want something different, such has global (EDF) scheduling, where task can migrate among CPUs according to their affinity, would but a major headache!! :-O > > Do you already have any numbers or testcase? I have some (well, a few!) > > of them... I'll try to find the time to give it a try to your patch with > > them... > > > We have tested Co-operative context switch time, Pre-emptive context > switch time and Interrupt Latency, all of them are of ~130us for > OMAP3530. > Mmm... I'm not sure I see why and how your patch should affect context switches duration... However, do you have the testcases for such tests? Regards, Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ---------------------------------------------------------------------- Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy) http://blog.linux.it/raistlin / raistlin@ekiga.net / dario.faggioli@jabber.org [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 197 bytes --] ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-25 8:46 ` Raistlin @ 2009-10-28 14:15 ` Soumya K S 2009-10-28 16:24 ` Raistlin 0 siblings, 1 reply; 10+ messages in thread From: Soumya K S @ 2009-10-28 14:15 UTC (permalink / raw) To: Raistlin Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli On Sun, Oct 25, 2009 at 2:16 PM, Raistlin <raistlin@linux.it> wrote: > On Thu, 2009-10-22 at 20:12 +0530, Soumya K S wrote: >> Hi Dario, > Hi again, > How you duin... >> we needed a deadline based scheduler for DRTL and we zeroed >> in upon SCHED_EDF. We see that the threads here are fairly new too :-P >> > Yes, we are not far from the starting point as well... :-) > >> > Nice, from here, it seemed we were working on very similar things, and I >> > was wondering if we could somehow collaborate... :-) >> > >> Sure! That would be good :) >> > So... I looked at the code and, even if the aim of our two projects are > quite the same, our implementations are very different. > > The main difference is the bandwidth reservation thing. > I strongly think that, on a system like Linux, it should be very > important to have --at least as a possibility-- the following features: > - tasks can request for a guaranteed runtime over some time interval > (bandwidth), We can specify the bandwidth reservation of an RT class and we use the reservation policy of the RT scheduling class itself. By increasing the static priority of the EDF task, we can guarantee that EDF tasks always get the required runtime. If the user puts all his EDF tasks in priority 1 , only his tasks run. In that case the entire RT bandwidth is reserved for the EDF tasks. In a way your patch also does the same thing by placing itself above the RT scheduling class. Only thing what we don't have in place is partitioning of RT bandwidth across RR/FIFO and EDF, which right now, we overcome by intelligently placing the tasks with different policies in different priority levels. If you are asking bandwidth reservation for guaranteeing determinism, we definitely have determinism in place, but bandwidth reservation for other real-time scheduling policies is not in place. This is something which we can surely work on. > - admission test should guarantee no oversubscription So, you are calculating the WCET online in the scheduler right? Can it calculate the amount of CPU time with the required preciseness? Here, you are increasing the enqueue time by adding an O(n) calculation for every task that you enqueue. That is the reason why for a small system, pushing this to architect made better sense in terms of decreased latencies where the turn around time from when the task enters till it gets the desired result matters, e.g., reading a sensor 2 times in 1ms. > - bandwidth enforcing must avoid reciprocal tasks interferences. > Maybe we can make the second and third configurable/optional (already > thought about that, and it should be quite easy), but they need to be > there, at least to avoid extending the interface again when we'll > realize they'll needed! :-P > > I don't know how much you, and other people here, are familiar with such > idea... We implemented it using one of the many existing algorithm. I > can give you pointers to papers and other implementations of it if > interested. Surely, we would like to look into this if you can provide some more pointers. > To keep it short and practical, you can think at it as something similar > to what MacOS-X (and I think Solaris) already have --and actively use, > e.g., for the Jack Audio Connection Kit-- and call > THREAD_TIME_CONSTRAINT (very poor docs, I think, but it gives the big > picture): > http://developer.apple.com/mac/library/documentation/Darwin/Conceptual/KernelProgramming/scheduler/scheduler.html#//apple_ref/doc/uid/TP30000905-CH211-BEHJDFCA > > Moreover, as Peter pointed out, coming out with an interface which is > too much tightly tied to EDF, or to any other specific algorithm, > wouldn't be the best idea. What would happen if somewhere in the future > we decide to change from the EDF algorithm to some other deadline based > one? Hmm on a lighter note, we would rather say you would just need to replace 3 functions then :)) > That's why we changed the name and the interface from _EDF/_edf (yep, it > has been our first choice too! :-P) to _DEADLINE/_deadline, and that's > why I think we should continue striving for even more > interface-algorithm independence. > True, but we really think its a matter of trade-off between how much response time you can guarantee for a real-time task v/s how much scalable you want your design to be. The deterministic response times that you might have achieved by having all these features might be good enough (Not sure of your numbers here) in a soft real time scenario, but wondering if it would meet ends otherwise. > Obviously, also SMP has to be there! :-P > We have it in fully partitioned right now, but I'll become global (with > support for cpu-affinity) in very short time I hope (Juri, from ReTiS > Lab in Pisa is already working on it). > > For rt-mutexes/deadline inheritance, the big issue is that, it works if > you only have EDF, but with a "bandwidth aware scheduler", you need > something more, which is why we don't have it yet... However, I think it > could be a nice starting point. Yes, we too think without a DI in place, its not a complete real-time solution. > > Finally, I like the idea of deadline IRQ handling and I think it would > be worth to mind it some more. :-) > We found many use cases for this feature where real-time tasks have higher priority than lower priority interrupts generated as a result of say audio streaming, etc Being able to configure these was extremely important to maintain the deterministic property of an EDF task in the system. >> > Even from the implementation point of view, I see you didn't used a new >> > scheduling class. >> > >> A simple system where there a few real-time tasks and a few non-real >> time tasks, the timelines can be architected out for each real-time >> task in the system. In such a case, given the RT bandwidth in the >> system, the task with the lowest deadline gets to be scheduled first >> till it is there in the system. >> In short, for such simple systems, we shifted the burden of admission >> control to the architect and kept close to the existing code. >> > Well, I kind of agree on both, _iff_ you're target is _only_ such small > embedded systems. However, my very humble opinion is that, on Linux, you > need something more general and, since we are talking about real-time, > as much predictable and analyzable as you can get... And I think a > separate scheduling class would be better suited for this. What do you > think? > Yes, the target was industrial control systems where we needed deterministic real-time response and also the responsiveness of the task was critical. Here, the demanding real-time tasks were not too many (~4/5 at a given point in time) and also, there were other user tasks which had to update the results of this real-time task remotely. Hence, we were very vary of introducing latencies in the system. Instead, we focused on bringing in determinism into the system without increasing its latency! Also, the concept of a deadline miss handler was very handy, for a task missing its deadline not to interfere with the determinism of the other tasks. In this approach, we were able to meet the demanding response times with determinism in place. However, I do understand that this approach puts the system designer in a hot spot! :) >> > I'm right in the opposite situation, I've got SMP (partitioned for now, >> > but we're working on migrations) and also CGROUPS support, but we are >> > still wondering how deadline (or something more sophisticated, like >> > bandwidth) inheritance could work in such a case... >> > >> That's right, we are still working on SMP and hope there are no >> scalability issues in this patch w.r.t SMP. >> > Well, I don't know. I guess achieving something similar to what we > already have now (partitioned SMP) should not be impossible even with > your approach... But if you want something different, such has global > (EDF) scheduling, where task can migrate among CPUs according to their > affinity, would but a major headache!! :-O > >> > Do you already have any numbers or testcase? I have some (well, a few!) >> > of them... I'll try to find the time to give it a try to your patch with >> > them... >> > >> We have tested Co-operative context switch time, Pre-emptive context >> switch time and Interrupt Latency, all of them are of ~130us for >> OMAP3530. >> > Mmm... I'm not sure I see why and how your patch should affect context > switches duration... However, do you have the testcases for such tests? > Well we are actually saying that it does _not_ effect the context switch time :). We are measuring the time when a task is entered in the system till it gets scheduled both in preemptive and non-preemptive modes. This figure does not change even for a loaded system which shows the deterministic turn around time for a task in terms of scheduling latencies. Regards, Shubhro Soumya ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-28 14:15 ` Soumya K S @ 2009-10-28 16:24 ` Raistlin 2009-11-10 14:03 ` Soumya K S 2009-11-10 14:34 ` Peter Zijlstra 0 siblings, 2 replies; 10+ messages in thread From: Raistlin @ 2009-10-28 16:24 UTC (permalink / raw) To: Soumya K S Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli [-- Attachment #1: Type: text/plain, Size: 9279 bytes --] On Wed, 2009-10-28 at 19:45 +0530, Soumya K S wrote: > > The main difference is the bandwidth reservation thing. > > I strongly think that, on a system like Linux, it should be very > > important to have --at least as a possibility-- the following features: > > - tasks can request for a guaranteed runtime over some time interval > > (bandwidth), > > We can specify the bandwidth reservation of an RT class and we use the > reservation policy of the RT scheduling class itself. > Yes, and all that you can specify is how much bandwidth all the EDF+FIFO tasks in the system will get. I was talking about something very different... :-( > By increasing > the static priority of the EDF task, we can guarantee that EDF tasks > always get the required runtime. > Which is not enforced to stay below any kind of value of deterministic/stochastic worst case execution time nor any kind of budget which is guaranteed to not being overrun. This means that you have no way to analyze the system and that you can make no assumptions about your tasks meeting their deadline or not, about who's going to miss it, by how far, etc. You can have an EDF task A executing for more than what you expected (if you expected something, and you _should_ expect something if you want to analyze the system at some level, don't you?) and maybe missing its deadline. Much more worse, you can have task A executing for more than what you expected and making task B and/or C and/or WHATEVER missing _their_ deadline, even if they "behave well"... This is far from real-time guaranteed behaviour, at least in my opinion. :-( > If the user puts all his EDF tasks in > priority 1 , only his tasks run. In that case the entire RT bandwidth > is reserved for the EDF tasks. In a way your patch also does the same > thing by placing itself above the RT scheduling class. > Agree on this, never said something different. :-) At least it is well known that deadline tasks have higher priority than FIFO/RR tasks that have higher priority than OTHER tasks. This, together with reservation based scheduling at the task (or at least task-group) level is what make the system analyzable and predictable. > Only thing what > we don't have in place is partitioning of RT bandwidth across RR/FIFO > and EDF, which right now, we overcome by intelligently placing the > tasks with different policies in different priority levels. > I'm not finding the 'intelligent placing' in the patch, so I guess this is up to the userspace. Providing the userspace with a flexible solution is something very useful... Relying on userspace to do things 'intelligently' is something I'm not sure I would do, especially in a so much general purpose OS like Linux, used in so much different contexts. But, again, that's only my opinion. :-) If I understood the code well (somme comments here and there would have helped! :-P) one (or more) EDF task(s) can starve FIFO/RR tasks, which may happen to me as well. However, it also may happen that one (or more) FIFO/RR task(s) starve EDF tasks! Thus, there always could be someone which might be starved and you can't even say who it will be... Again, this seems lack of determinism to me. > If you are asking bandwidth reservation for guaranteeing determinism, > we definitely have determinism in place, but bandwidth reservation for > other real-time scheduling policies is not in place. > See? World is so beautiful because there are so much different possible opinions and interpretations of the same concepts! :-D :-D > > - admission test should guarantee no oversubscription > > So, you are calculating the WCET online in the scheduler right? > No, I don't... Did you looked at the code? > Can it > calculate the amount of CPU time with the required preciseness? Here, > you are increasing the enqueue time by adding an O(n) calculation for > every task that you enqueue. > No, I don't... Did you looked at the code? :-P > That is the reason why for a small > system, pushing this to architect made better sense in terms of > decreased latencies where the turn around time from when the task > enters till it gets the desired result matters, e.g., reading a sensor > 2 times in 1ms. > Given the fact that I do not have anything in the scheduler that increase latencies and enqueue/dequeue overhead, it sure depends on you're target, as already said. You keep saying that for a small system it is up to the system architect to check if the configuration will be schedulable or not, which may be reasonable. What I'm wondering is how this poor guy might do that and hope to have this enforced by a scheduling policy which allows a task to interfere with all the other ones to the point of making them missing their deadlines... And this could happen in your code, since you only have deadline miss based checks, which may be not enough to prevent it. > > That's why we changed the name and the interface from _EDF/_edf (yep, it > > has been our first choice too! :-P) to _DEADLINE/_deadline, and that's > > why I think we should continue striving for even more > > interface-algorithm independence. > > > True, but we really think its a matter of trade-off between how much > response time you can guarantee for a real-time task v/s how much > scalable you want your design to be. > Well, I'm not seeing how trying to have a better interface/algorithm separation would affect the response time that much... For example, I don't expect that putting your code in a separate scheduling class would make you miss some deadline... > The deterministic response times > that you might have achieved by having all these features might be > good enough (Not sure of your numbers here) in a soft real time > scenario, but wondering if it would meet ends otherwise. > The response time I can achieve with all these features is exactly the same you can achieve with the current FIFO/RR task, which have more or less the same features. Actually, the scheduling overhead is even smaller than in rt tasks since we are still able to enforce bandwidth without the need of hierarchical scheduling and accounting... The added feature of being able to asking the scheduler that you don't want you're task response time, latency and ability to meet its deadline to be affected by some other task which is running away comes with no price in therms of response time. By the way, what numbers do you miss here? Just ask and I'll do my best to provide them to you... > Yes, the target was industrial control systems where we needed > deterministic real-time response and also the responsiveness of the > task was critical. Here, the demanding real-time tasks were not too > many (~4/5 at a given point in time) and also, there were other user > tasks which had to update the results of this real-time task remotely. > Hence, we were very vary of introducing latencies in the system. > Instead, we focused on bringing in determinism into the system without > increasing its latency! > Hey, 'the system' already has a scheduling policy called SCHED_FIFO which already have _a_lot_ of determinism... and EDF is **not** more deterministic than fix-priority! There are people that like more EDF than FP, there are people that like more FP than EDF, they both have advantages and drawbacks, but implementing EDF can't be claimed as 'bringing determinism'... So, now I'm curious :-D. You say you need EDF in that application scenario, which might be more than true, but the reason can't be 'lack of determinism' since FP scheduling is as much deterministic as you want/are able to configure it using the correct priorities... So what was your problem with it? > Also, the concept of a deadline miss handler > was very handy, for a task missing its deadline not to interfere with > the determinism of the other tasks. > Oh, ok. But I think we can agree that you can have a task that, as said above, not miss its own deadline --and thus you don't catch it-- but makes all the other tasks in the system to miss their own ones! How your definition of determinism applies on this situation? :-O > > Mmm... I'm not sure I see why and how your patch should affect context > > switches duration... However, do you have the testcases for such tests? > > > > Well we are actually saying that it does _not_ effect the context > switch time :). > Which was expectable... > We are measuring the time when a task is entered in the system till it > gets scheduled both in preemptive and non-preemptive modes. This > figure does not change even for a loaded system which shows the > deterministic turn around time for a task in terms of scheduling > latencies. > ... Ok, it seems I need to be more explicit here: do you have the code of the tests, so that someone else can reproduce them? Regards, Dario -- <<This happens because I choose it to happen!>> (Raistlin Majere) ---------------------------------------------------------------------- Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy) http://blog.linux.it/raistlin / raistlin@ekiga.net / dario.faggioli@jabber.org [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 197 bytes --] ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-28 16:24 ` Raistlin @ 2009-11-10 14:03 ` Soumya K S 2009-11-10 14:34 ` Peter Zijlstra 1 sibling, 0 replies; 10+ messages in thread From: Soumya K S @ 2009-11-10 14:03 UTC (permalink / raw) To: Raistlin Cc: linux-kernel, mingo, Dhaval Giani, Peter Zijlstra, Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli On Wed, Oct 28, 2009 at 9:54 PM, Raistlin <raistlin@linux.it> wrote: > On Wed, 2009-10-28 at 19:45 +0530, Soumya K S wrote: >> > The main difference is the bandwidth reservation thing. >> > I strongly think that, on a system like Linux, it should be very >> > important to have --at least as a possibility-- the following features: >> > - tasks can request for a guaranteed runtime over some time interval >> > (bandwidth), >> >> We can specify the bandwidth reservation of an RT class and we use the >> reservation policy of the RT scheduling class itself. >> > Yes, and all that you can specify is how much bandwidth all the > EDF+FIFO tasks in the system will get. I was talking about something > very different... :-( > >> By increasing >> the static priority of the EDF task, we can guarantee that EDF tasks >> always get the required runtime. >> > Which is not enforced to stay below any kind of value of > deterministic/stochastic worst case execution time nor any kind of > budget which is guaranteed to not being overrun. This means that you > have no way to analyze the system and that you can make no assumptions > about your tasks meeting their deadline or not, about who's going to > miss it, by how far, etc. We now agree that bandwidth reservation comes in very handy. But we still feel that making use of RT bandwidth itself and reserving some for EDF is sufficient. We are working on getting this up. Having said this, further, let us know if we are missing anything here or we understood it wrong. According to our understanding, we see that the Bandwidth reservation + 'budgeting' in your patch has the following impact on the system: 1. Admission Control made configurable 2. DOMINO effect 3. System analysis 4. Replenishment of deadlines to the task missing their deadlines. 1. Admission Control made configurable You take in something called as a "budget" and "deadline" from the user and these parameters decide the sanity of the entire real-time system. How does the user provide "budget" ? - If the budget of a task is its WCET, then admission control works just fine here. - If the budget of a task is less than its WCET, admission control does not have much meaning, right? So, what we have different in our designs w.r.t how user sees it? - We depend on user to provide meaningful deadlines. You depend on user to provide meaningful deadlines + meaningful budget. - We depend on user / system architect to decide the schedulability of a task set. You take in the numbers from the user, but calculate it yourself. OK, that way you have some configurabality! 2. DOMINO effect You prevent Domino effect by checking the available bandwidth against the budget, when you see that the task will not finish in the remaining bandwidth, you push the task out. you replenish the budget and the deadline for the task for the next execution cycle. And now, I guess you are sending a signal to the user. We prevent this by sending a signal to the user and the default handler kills the task, thus not affecting the deadlines of the other tasks. 3. System Analysis You can analyse the system about your tasks meeting their deadline or not, about who's going to miss it, by how far "a priori". Also, This is fair only if your budgeting is done well. Agree, we cannot do any analysis "prior" to missing its deadline. I cant yet figure a use case where system analysis is absolutely required in a real-time scenario while execution, but while trial and error, this might definitely come in handy. For me, task either misses a deadline, or successfully completes in-time, probably only sufficient while in runtime. 4. Replenish deadline of the deadline missed task Hmm, cant yet understand a good use case of this. If user "wants" to do this, he can do it from the signal handler itself in our case. But we still feel doing that yourself without the user knowing it is changing the very "meaning" of a task deadline! I guess this comes in use for a periodic task?? Correct me if I am wrong, but "replenishment of a task deadline" for a non-periodic task ---- doesnt make any sense for its very meaning of a deadline is lost... If this is for a periodic task, and "postponing" a given user _deadline_ is _tolerable_ in the system, why do you need EDF for this? Wont an FP scheduler take better care in such case?? > > You can have an EDF task A executing for more than what you expected (if > you expected something, and you _should_ expect something if you want to > analyze the system at some level, don't you?) and maybe missing its > deadline. When user specifies a deadline, the task should finish within its deadline, thats his expectation!! About missing its deadline, thats where the fault handler comes into picture where user also specifes the time before deadline when the handler is fired.. > > Much more worse, you can have task A executing for more than what you > expected and making task B and/or C and/or WHATEVER missing _their_ > deadline, even if they "behave well"... This is far from real-time > guaranteed behaviour, at least in my opinion. :-( If task 'A' is getting executed, that means, task A has the earliest deadline, task B and C cannot miss their deadline just because task 'A' is getting executed -- it 'cant' get executed for more than what is expected in the system unless user specifies so, remember? According to my expectation task A should finish within the deadline. If it does not, it depends upon the user what to do with the task like increase the deadline, or terminate it. We send a signal to the process intimating the same. And if every task behaves well and all the task parameters are "respected during actual execution" I don't see any reason why other tasks would miss their deadline if the task set is proper in the system. > >> If the user puts all his EDF tasks in >> priority 1 , only his tasks run. In that case the entire RT bandwidth >> is reserved for the EDF tasks. In a way your patch also does the same >> thing by placing itself above the RT scheduling class. >> > Agree on this, never said something different. :-) > > At least it is well known that deadline tasks have higher priority than > FIFO/RR tasks that have higher priority than OTHER tasks. This, together > with reservation based scheduling at the task (or at least task-group) > level is what make the system analyzable and predictable. > ya ya.. analysable is something we lack here !! totally agreed..... :) >> Only thing what >> we don't have in place is partitioning of RT bandwidth across RR/FIFO >> and EDF, which right now, we overcome by intelligently placing the >> tasks with different policies in different priority levels. >> > I'm not finding the 'intelligent placing' in the patch, so I guess this > is up to the userspace. Providing the userspace with a flexible solution > is something very useful... Relying on userspace to do things > 'intelligently' is something I'm not sure I would do, especially in a so > much general purpose OS like Linux, used in so much different contexts. > But, again, that's only my opinion. :-) Hmm I guess you too are "totally" dependent on user giving you the right parameters _intelligently_ (deadline / budget)... I guess we are not too different there expecting the users to be _aware_ ..! > > If I understood the code well (somme comments here and there would have > helped! :-P) one (or more) EDF task(s) can starve FIFO/RR tasks, which > may happen to me as well. However, it also may happen that one (or more) > FIFO/RR task(s) starve EDF tasks! > > Thus, there always could be someone which might be starved and you can't > even say who it will be... Again, this seems lack of determinism to me. Right, but now that we agree on Bandwidth reservation with RT class, this will be thankfully, resolved. > >> If you are asking bandwidth reservation for guaranteeing determinism, >> we definitely have determinism in place, but bandwidth reservation for >> other real-time scheduling policies is not in place. >> > See? World is so beautiful because there are so much different possible > opinions and interpretations of the same concepts! :-D :-D > Differences in Language and communication makes it even more beautiful!!!! ;-) >> > - admission test should guarantee no oversubscription >> >> So, you are calculating the WCET online in the scheduler right? >> > No, I don't... Did you looked at the code? > >> Can it >> calculate the amount of CPU time with the required preciseness? Here, >> you are increasing the enqueue time by adding an O(n) calculation for >> every task that you enqueue. >> > No, I don't... Did you looked at the code? :-P > >> That is the reason why for a small >> system, pushing this to architect made better sense in terms of >> decreased latencies where the turn around time from when the task >> enters till it gets the desired result matters, e.g., reading a sensor >> 2 times in 1ms. >> > Given the fact that I do not have anything in the scheduler that > increase latencies and enqueue/dequeue overhead, it sure depends on > you're target, as already said. > > You keep saying that for a small system it is up to the system architect > to check if the configuration will be schedulable or not, which may be > reasonable. > What I'm wondering is how this poor guy might do that and hope to have > this enforced by a scheduling policy which allows a task to interfere > with all the other ones to the point of making them missing their > deadlines... And this could happen in your code, since you only have > deadline miss based checks, which may be not enough to prevent it. > Well, this in your case depends on the sanity of the 'budget" the user provides, the same way it depends on the sanity of the "deadline" the user provides. And, _NO_, a task missing its deadline _cannot_ make other tasks miss their deadline too if the deadlines given are sane. >> > That's why we changed the name and the interface from _EDF/_edf (yep, it >> > has been our first choice too! :-P) to _DEADLINE/_deadline, and that's >> > why I think we should continue striving for even more >> > interface-algorithm independence. >> > >> True, but we really think its a matter of trade-off between how much >> response time you can guarantee for a real-time task v/s how much >> scalable you want your design to be. >> > Well, I'm not seeing how trying to have a better interface/algorithm > separation would affect the response time that much... For example, I > don't expect that putting your code in a separate scheduling class would > make you miss some deadline... > >> The deterministic response times >> that you might have achieved by having all these features might be >> good enough (Not sure of your numbers here) in a soft real time >> scenario, but wondering if it would meet ends otherwise. >> > The response time I can achieve with all these features is exactly the > same you can achieve with the current FIFO/RR task, which have more or > less the same features. Actually, the scheduling overhead is even > smaller than in rt tasks since we are still able to enforce bandwidth > without the need of hierarchical scheduling and accounting... > > The added feature of being able to asking the scheduler that you don't > want you're task response time, latency and ability to meet its deadline > to be affected by some other task which is running away comes with no > price in therms of response time. > > By the way, what numbers do you miss here? Just ask and I'll do my best > to provide them to you... > >> Yes, the target was industrial control systems where we needed >> deterministic real-time response and also the responsiveness of the >> task was critical. Here, the demanding real-time tasks were not too >> many (~4/5 at a given point in time) and also, there were other user >> tasks which had to update the results of this real-time task remotely. >> Hence, we were very vary of introducing latencies in the system. >> Instead, we focused on bringing in determinism into the system without >> increasing its latency! >> > Hey, 'the system' already has a scheduling policy called SCHED_FIFO > which already have _a_lot_ of determinism... and EDF is **not** more > deterministic than fix-priority! There are people that like more EDF > than FP, there are people that like more FP than EDF, they both have > advantages and drawbacks, but implementing EDF can't be claimed as > 'bringing determinism'... > > So, now I'm curious :-D. > > You say you need EDF in that application scenario, which might be more > than true, but the reason can't be 'lack of determinism' since FP > scheduling is as much deterministic as you want/are able to configure it > using the correct priorities... So what was your problem with it? > >> Also, the concept of a deadline miss handler >> was very handy, for a task missing its deadline not to interfere with >> the determinism of the other tasks. >> > Oh, ok. But I think we can agree that you can have a task that, as said > above, not miss its own deadline --and thus you don't catch it-- but > makes all the other tasks in the system to miss their own ones! > > How your definition of determinism applies on this situation? :-O If the task deadlines are proper and "respected" during actual execution, and user supplies correct deadlines to the tasks I donot see any reason why the system is not deterministic!! > >> > Mmm... I'm not sure I see why and how your patch should affect context >> > switches duration... However, do you have the testcases for such tests? >> > >> >> Well we are actually saying that it does _not_ effect the context >> switch time :). >> > Which was expectable... > >> We are measuring the time when a task is entered in the system till it >> gets scheduled both in preemptive and non-preemptive modes. This >> figure does not change even for a loaded system which shows the >> deterministic turn around time for a task in terms of scheduling >> latencies. >> > ... Ok, it seems I need to be more explicit here: do you have the code > of the tests, so that someone else can reproduce them? > It measures the time from the dequeue of the previous task to the scheduling of the next task in the queue. Just variables to catch times in the kernel and the user app would do.. Regards, Shubhro Soumya ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-28 16:24 ` Raistlin 2009-11-10 14:03 ` Soumya K S @ 2009-11-10 14:34 ` Peter Zijlstra 1 sibling, 0 replies; 10+ messages in thread From: Peter Zijlstra @ 2009-11-10 14:34 UTC (permalink / raw) To: Raistlin Cc: Soumya K S, linux-kernel, mingo, Dhaval Giani, Thomas Gleixner, Claudio Scordino, michael trimarchi, Juri Lelli On Wed, 2009-10-28 at 17:24 +0100, Raistlin wrote: > Relying on userspace to do things > 'intelligently' is something I'm not sure I would do, especially in a so > much general purpose OS like Linux, used in so much different contexts. > But, again, that's only my opinion. :-) I'd put it even stronger, relying on userspace to do something intelligent is utterly stupid. We have to assume userspace is hostile and out to get you. Therefore I fully support the model that puts admission control into the kernel, because that provides isolation between, and guarantees to applications. Not providing this, and having no overload protection is one of the biggest failures of SCHED_FIFO/RR. On Tue, 2009-11-10 at 19:33 +0530, Soumya K S wrote: > Hmm I guess you too are "totally" dependent on user giving you the > right parameters _intelligently_ (deadline / budget)... I guess we are > not too different there expecting the users to be _aware_ ..! The difference is that if A messes up its own parameters only A suffers and the rest of the system continues to work as expected. With your approach anything is out the window. So yes, userspace needs to be aware, but a task can only screw itself, not everybody else, which is a very important feature. @ DRTL folks: If you want deadline based scheduling (it appears you do) I suggest you start helping out Dario, your proposal isn't going to happen. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers 2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S 2009-10-22 7:54 ` Raistlin @ 2009-10-22 9:33 ` Claudio Scordino [not found] ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com> 2 siblings, 0 replies; 10+ messages in thread From: Claudio Scordino @ 2009-10-22 9:33 UTC (permalink / raw) To: Soumya K S; +Cc: linux-kernel, tglx, mingo, Peter Zijlstra Soumya K S ha scritto: > Hello All, > > We would like to present a patch on Deployment specific Real-Time > Linux, topic discussed in LinuxCon 2009 > <http://events.linuxfoundation.org/lc09d17> > > The developed framework allows user to specify the real-time > strategies for a specific real-time scenario. User specifies > configurations like scheduling policy, Deadline-miss Fault-tolerance > limit, interrupt priorities, etc. Real-time applications use onetime > gateway to notify kernel that they require real-time response. All > applications use existing POSIX APIs. DRTL scheduler is time-aware and > uses EDF as the scheduling policy. > > The patch consists of Time aware scheduler having SCHED_EDF as the > scheduling policy, Deadline based scheduling for Interrupt handlers, > Deadline Inheritance support for RT-Mutexes. > > The patch is for the kernel version 2.6.32-rc3. It has been tested on > OMAP3530, ATMEL AT91SAM9261 and X86 platforms. > > We look forward for support and feedback about > DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en> > and the patch for its feasibility, scalability and performance. > > Many Thanks, > Soumya KS > Shubhro Sinha Hi, FYI, a patch to add EDF scheduling to Linux has been already proposed in September (see http://lkml.org/lkml/2009/9/22/186), discussed on mailing list and presented with a talk at the last Real-Time Linux Workshop (RTLWS - http://www.osadl.org/Dresden-2009.rtlws11-dresden-2009.0.html). The patch was initially called "SCHED_EDF". Then, on suggestion of Peter, the name has been changed to "SCHED_DEADLINE" and the code moved to a public git repository (http://gitorious.org/sched_deadline/sched-deadline). Recently, the patch has been re-submitted to LKML (see http://lkml.org/lkml/2009/10/16/161) with many issues fixed. Among them: a new behavior of fork(), a new syscall for periodic tasks, flags to signal deadline misses and budget overruns. All these changes come from the comments and the feedback got on mailing list and at the workshop. At a first glance, your patch extends sched_param (which may lead to compatibility problems with old code), it does not support SMP platforms, and it changes the sched_rt code instead of adding a new scheduling class. For the reasons above, I think that we should stick with SCHED_DEADLINE, eventually integrating some parts of your code into that project. Best regards, Claudio ^ permalink raw reply [flat|nested] 10+ messages in thread
[parent not found: <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>]
* Re: [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers [not found] ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com> @ 2009-10-22 14:40 ` Soumya K S 0 siblings, 0 replies; 10+ messages in thread From: Soumya K S @ 2009-10-22 14:40 UTC (permalink / raw) To: linux-kernel; +Cc: tglx, mingo Making the patch inline,, Thanks, Soumya diff -Naur linux-2.6.32-rc3/arch/arm/include/asm/signal.h linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h --- linux-2.6.32-rc3/arch/arm/include/asm/signal.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/arch/arm/include/asm/signal.h 2009-10-20 10:40:16.000000000 +0530 @@ -70,6 +70,7 @@ #define SIGRTMIN 32 #define SIGRTMAX _NSIG +#define SIGMISSDEAD SIGRTMIN + 4 #define SIGSWI 32 /* diff -Naur linux-2.6.32-rc3/include/linux/interrupt.h linux-2.6.32-rc3-drtl/include/linux/interrupt.h --- linux-2.6.32-rc3/include/linux/interrupt.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/interrupt.h 2009-10-20 11:55:45.000000000 +0530 @@ -107,6 +107,12 @@ }; extern irqreturn_t no_action(int cpl, void *dev_id); +#ifdef CONFIG_SCHED_EDF +extern int __must_check +request_irq_edf(unsigned int irq, irq_handler_t handler,irq_handler_t thread_fn, unsigned long flags, + const char *name, void *dev, struct timespec *ts); +#endif + #ifdef CONFIG_GENERIC_HARDIRQS extern int __must_check diff -Naur linux-2.6.32-rc3/include/linux/irq.h linux-2.6.32-rc3-drtl/include/linux/irq.h --- linux-2.6.32-rc3/include/linux/irq.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/irq.h 2009-10-20 11:51:27.000000000 +0530 @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/topology.h> #include <linux/wait.h> +#include <linux/ktime.h> #include <asm/irq.h> #include <asm/ptrace.h> @@ -206,6 +207,8 @@ struct proc_dir_entry *dir; #endif const char *name; + ktime_t deadline; + unsigned edf_flag; } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, diff -Naur linux-2.6.32-rc3/include/linux/plist.h linux-2.6.32-rc3-drtl/include/linux/plist.h --- linux-2.6.32-rc3/include/linux/plist.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/plist.h 2009-10-20 11:00:31.000000000 +0530 @@ -87,6 +87,8 @@ struct plist_node { int prio; + long long int deadline; + int policy; struct plist_head plist; }; @@ -142,9 +144,11 @@ * @node: &struct plist_node pointer * @prio: initial node priority */ -static inline void plist_node_init(struct plist_node *node, int prio) +static inline void plist_node_init(struct plist_node *node, int prio, long long int deadline, int policy) { node->prio = prio; + node->deadline = deadline; + node->policy = policy; plist_head_init(&node->plist, NULL); } diff -Naur linux-2.6.32-rc3/include/linux/sched.h linux-2.6.32-rc3-drtl/include/linux/sched.h --- linux-2.6.32-rc3/include/linux/sched.h 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/include/linux/sched.h 2009-10-20 11:03:51.000000000 +0530 @@ -36,6 +36,7 @@ #define SCHED_FIFO 1 #define SCHED_RR 2 #define SCHED_BATCH 3 +#define SCHED_EDF 123 /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ @@ -43,9 +44,6 @@ #ifdef __KERNEL__ -struct sched_param { - int sched_priority; -}; #include <asm/param.h> /* for HZ */ @@ -102,6 +100,11 @@ struct bts_context; struct perf_event_context; +struct sched_param { + int sched_priority; + struct timespec deadline; +}; + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -195,6 +198,7 @@ #define TASK_DEAD 64 #define TASK_WAKEKILL 128 #define TASK_WAKING 256 +#define EXIT_MISS_DEADLINE 512 /* Convenience macros for the sake of set_task_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) @@ -1201,6 +1205,9 @@ int nr_cpus_allowed; struct sched_rt_entity *back; +#ifdef CONFIG_SCHED_EDF + struct rb_node edf_node; +#endif #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ @@ -1232,6 +1239,11 @@ const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; +#ifdef CONFIG_SCHED_EDF + ktime_t edf_deadline; + ktime_t rt_deadline; + struct timespec orig_deadline; +#endif #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ @@ -1733,6 +1745,7 @@ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_HARDIRQ 0x00000020 /* hardirq context */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ @@ -1932,7 +1945,7 @@ #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline); extern void rt_mutex_adjust_pi(struct task_struct *p); #else static inline int rt_mutex_getprio(struct task_struct *p) diff -Naur linux-2.6.32-rc3/init/Kconfig linux-2.6.32-rc3-drtl/init/Kconfig --- linux-2.6.32-rc3/init/Kconfig 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/init/Kconfig 2009-10-20 10:40:16.000000000 +0530 @@ -425,6 +425,12 @@ # config HAVE_UNSTABLE_SCHED_CLOCK bool + +config SCHED_EDF + bool "EDF Scheduler Support" + default n + depends on !GROUP_SCHED + depends on !SMP config GROUP_SCHED bool "Group CPU scheduler" diff -Naur linux-2.6.32-rc3/init/main.c linux-2.6.32-rc3-drtl/init/main.c --- linux-2.6.32-rc3/init/main.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/init/main.c 2009-10-20 10:40:16.000000000 +0530 @@ -101,6 +101,10 @@ enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); +#ifdef CONFIG_SCHED_EDF +void kthread_deadmiss(void); +#endif + /* * Boot command-line arguments */ @@ -428,6 +432,9 @@ numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); +#ifdef CONFIG_SCHED_EDF + kernel_thread(kthread_deadmiss, NULL, CLONE_FS | CLONE_FILES); +#endif unlock_kernel(); /* diff -Naur linux-2.6.32-rc3/kernel/futex.c linux-2.6.32-rc3-drtl/kernel/futex.c --- linux-2.6.32-rc3/kernel/futex.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/futex.c 2009-10-20 11:12:18.000000000 +0530 @@ -1384,7 +1384,7 @@ */ prio = min(current->normal_prio, MAX_RT_PRIO); - plist_node_init(&q->list, prio); + plist_node_init(&q->list, prio, 0, 0); #ifdef CONFIG_DEBUG_PI_LIST q->list.plist.lock = &hb->lock; #endif diff -Naur linux-2.6.32-rc3/kernel/irq/manage.c linux-2.6.32-rc3-drtl/kernel/irq/manage.c --- linux-2.6.32-rc3/kernel/irq/manage.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/irq/manage.c 2009-10-20 10:41:06.000000000 +0530 @@ -536,7 +536,16 @@ struct irq_desc *desc = irq_to_desc(action->irq); int wake, oneshot = desc->status & IRQ_ONESHOT; - sched_setscheduler(current, SCHED_FIFO, ¶m); + current->flags |= PF_HARDIRQ; + if (desc->edf_flag) + { + param.deadline.tv_sec = desc->deadline.tv.sec; + param.deadline.tv_nsec = desc->deadline.tv.nsec; + sched_setscheduler(current, SCHED_EDF, ¶m); + } + else + sched_setscheduler(current, SCHED_FIFO, ¶m); + current->irqaction = action; while (!irq_wait_for_interrupt(action)) { @@ -1088,3 +1097,18 @@ return retval; } EXPORT_SYMBOL(request_threaded_irq); + +#ifdef CONFIG_SCHED_EDF +int request_irq_edf (unsigned int irq, irq_handler_t handler, irq_handler_t thread_fn, + unsigned long irqflags, const char *devname, void *dev_id, struct timespec *deadline) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + desc->deadline.tv.sec = deadline->tv_sec; + desc->deadline.tv.nsec = deadline->tv_nsec; + desc->edf_flag = 1; + return request_threaded_irq (irq, handler,thread_fn, irqflags, devname, dev_id); +} +EXPORT_SYMBOL(request_irq_edf); +#endif + diff -Naur linux-2.6.32-rc3/kernel/kthread.c linux-2.6.32-rc3-drtl/kernel/kthread.c --- linux-2.6.32-rc3/kernel/kthread.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/kthread.c 2009-10-20 11:54:12.000000000 +0530 @@ -33,6 +33,14 @@ struct list_head list; }; +#ifdef CONFIG_SCHED_EDF +struct kthread_deadmiss_info { + struct completion deadmiss; + struct task_struct *k; +} kthread_dead_info; +EXPORT_SYMBOL (kthread_dead_info); +#endif + struct kthread { int should_stop; struct completion exited; @@ -247,3 +255,30 @@ return 0; } + +#ifdef CONFIG_SCHED_EDF +void dead_miss_default (void) +{ + struct task_struct *tsk = current; + set_task_comm(current, "Deadmiss Default"); + /* Try to stop the runaway thread */ + kthread_stop (kthread_dead_info.k); +} + +void kthread_deadmiss (void) +{ + struct task_struct *tsk = current; + + set_task_comm(tsk, "Deadmiss Thread"); + ignore_signals(tsk); + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; + + while (1) + { + init_completion(&kthread_dead_info.deadmiss); + wait_for_completion (&kthread_dead_info.deadmiss); + kthread_run(dead_miss_default,NULL,"Deadmiss Default"); + + } +} +#endif diff -Naur linux-2.6.32-rc3/kernel/rtmutex.c linux-2.6.32-rc3-drtl/kernel/rtmutex.c --- linux-2.6.32-rc3/kernel/rtmutex.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/rtmutex.c 2009-10-20 11:13:24.000000000 +0530 @@ -112,6 +112,19 @@ task->normal_prio); } +#ifdef CONFIG_SCHED_EDF +ktime_t rt_mutex_getdeadline(struct task_struct *task) +{ + + if (likely(!task_has_pi_waiters(task))) { + return task->rt_deadline; + } + if (ktime_sub((ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline,task->rt_deadline).tv64 < 0 ) + return (ktime_t)task_top_pi_waiter(task)->pi_list_entry.deadline; + else return task->rt_deadline; +} +#endif + /* * Adjust the priority of a task, after its pi_waiters got modified. * @@ -122,7 +135,20 @@ int prio = rt_mutex_getprio(task); if (task->prio != prio) - rt_mutex_setprio(task, prio); + rt_mutex_setprio(task, prio, NULL); + +#ifdef CONFIG_SCHED_EDF + else { + if ((task_top_pi_waiter(task)->pi_list_entry.policy == SCHED_EDF && task->policy == SCHED_EDF) + || (!task_has_pi_waiters(task)) && task->policy == SCHED_EDF) { + ktime_t deadline = rt_mutex_getdeadline(task); + if (!ktime_equal (deadline, task->edf_deadline)) + rt_mutex_setprio(task,prio,&deadline); + return; + } + } +#endif + } /* @@ -424,8 +450,14 @@ __rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; - plist_node_init(&waiter->list_entry, task->prio); - plist_node_init(&waiter->pi_list_entry, task->prio); +#ifdef CONFIG_SCHED_EDF + plist_node_init(&waiter->list_entry, current->prio,current->edf_deadline.tv64,current->policy); + plist_node_init(&waiter->pi_list_entry, current->prio,current->edf_deadline.tv64,current->policy); +#else + plist_node_init(&waiter->list_entry, task->prio, 0,0); + plist_node_init(&waiter->pi_list_entry, task->prio,0,0); +#endif + /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) diff -Naur linux-2.6.32-rc3/kernel/sched.c linux-2.6.32-rc3-drtl/kernel/sched.c --- linux-2.6.32-rc3/kernel/sched.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched.c 2009-10-20 11:10:33.000000000 +0530 @@ -121,7 +121,7 @@ static inline int rt_policy(int policy) { - if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) + if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR || policy == SCHED_EDF)) return 1; return 0; } @@ -451,6 +451,9 @@ struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; +#ifdef CONFIG_SCHED_EDF + unsigned long edf_running; +#endif #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED struct { int curr; /* highest queued rt task prio */ @@ -479,6 +482,11 @@ struct task_group *tg; struct sched_rt_entity *rt_se; #endif +#ifdef CONFIG_SCHED_EDF + struct rb_root edf_root; + struct sched_rt_entity *edf_next; + +#endif }; #ifdef CONFIG_SMP @@ -1816,6 +1824,9 @@ #include "sched_stats.h" #include "sched_idletask.c" #include "sched_fair.c" +#ifdef CONFIG_SCHED_EDF +#include "sched_edf.c" +#endif #include "sched_rt.c" #ifdef CONFIG_SCHED_DEBUG # include "sched_debug.c" @@ -2560,8 +2571,12 @@ /* Want to start with kernel preemption disabled. */ task_thread_info(p)->preempt_count = 1; #endif - plist_node_init(&p->pushable_tasks, MAX_PRIO); +#ifdef CONFIG_SCHED_EDF + plist_node_init(&p->pushable_tasks, MAX_PRIO, p->edf_deadline.tv64, p->policy); +#else + plist_node_init(&p->pushable_tasks, MAX_PRIO, 0, 0); +#endif put_cpu(); } @@ -5942,7 +5957,7 @@ * * Used by the rt_mutex code to implement priority inheritance logic. */ -void rt_mutex_setprio(struct task_struct *p, int prio) +void rt_mutex_setprio(struct task_struct *p, int prio, ktime_t *deadline) { unsigned long flags; int oldprio, on_rq, running; @@ -5966,7 +5981,13 @@ p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; - + +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF && deadline != NULL) + { + p->edf_deadline = *deadline; + } +#endif p->prio = prio; if (running) @@ -6150,6 +6171,7 @@ break; case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: p->sched_class = &rt_sched_class; break; } @@ -6197,7 +6219,7 @@ reset_on_fork = !!(policy & SCHED_RESET_ON_FORK); policy &= ~SCHED_RESET_ON_FORK; - if (policy != SCHED_FIFO && policy != SCHED_RR && + if (policy != SCHED_FIFO && policy != SCHED_RR && policy != SCHED_EDF && policy != SCHED_NORMAL && policy != SCHED_BATCH && policy != SCHED_IDLE) return -EINVAL; @@ -6344,7 +6366,7 @@ { return __sched_setscheduler(p, policy, param, false); } - +EXPORT_SYMBOL(sched_setscheduler_nocheck); static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) { @@ -6361,7 +6383,17 @@ retval = -ESRCH; p = find_process_by_pid(pid); if (p != NULL) + { +#ifdef CONFIG_SCHED_EDF + if (policy == SCHED_EDF) + { + p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (lparam.deadline)); + p->orig_deadline = lparam.deadline; + p->rt_deadline = p->edf_deadline; + } +#endif retval = sched_setscheduler(p, policy, &lparam); + } rcu_read_unlock(); return retval; @@ -6767,6 +6799,7 @@ switch (policy) { case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: ret = MAX_USER_RT_PRIO-1; break; case SCHED_NORMAL: @@ -6792,6 +6825,7 @@ switch (policy) { case SCHED_FIFO: case SCHED_RR: + case SCHED_EDF: ret = 1; break; case SCHED_NORMAL: @@ -9226,7 +9260,11 @@ { struct rt_prio_array *array; int i; - +#ifdef CONFIG_SCHED_EDF + rt_rq->edf_root.rb_node = NULL; + rt_rq->edf_running = 0; + rt_rq->edf_next = NULL; +#endif array = &rt_rq->active; for (i = 0; i < MAX_RT_PRIO; i++) { INIT_LIST_HEAD(array->queue + i); diff -Naur linux-2.6.32-rc3/kernel/sched_edf.c linux-2.6.32-rc3-drtl/kernel/sched_edf.c --- linux-2.6.32-rc3/kernel/sched_edf.c 1970-01-01 05:30:00.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched_edf.c 2009-10-20 11:25:24.000000000 +0530 @@ -0,0 +1,116 @@ +#define check_bit(node1, node2) ((node1->rb_parent_color ^ node2->rb_parent_color)&2) +static inline void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq); +static inline void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq); +static int has_equal = 0; +static struct sched_rt_entity *leftmost = NULL; +static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se); +static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se); +static inline ktime_t edf_se_deadline (struct sched_rt_entity *rt_se) +{ + return (rt_task_of(rt_se))->edf_deadline; +} +void enqueue_task_edf(struct rq *rq, struct task_struct *p) +{ + + struct rt_rq *rt_rq = &rq->rt; + struct rb_node **link = &rt_rq->edf_root.rb_node, *parent=NULL; + struct sched_rt_entity *entry; + int leftmost_flag= 1, equal = 0; + s64 diff; + u8 last_bit = 0; + /* + * Find the right place in the rbtree: + */ + has_equal = 0; + if (p->flags & PF_HARDIRQ) + p->edf_deadline = ktime_add(ktime_get(),timespec_to_ktime (p->orig_deadline)); + while (*link) { + parent = *link; + entry = rb_entry(parent, struct sched_rt_entity, edf_node); + /* + * We dont care about collisions. Nodes with + * the same key stay together. + */ + + diff = ktime_sub(p->edf_deadline,edf_se_deadline(entry)).tv64; + if (diff < 0) { + link = &parent->rb_left; + } else if (diff == 0) { + link = &parent->rb_left; + last_bit = (parent->rb_parent_color & 0x02); + equal = 1; + } + else { + link = &parent->rb_right; + leftmost_flag = 0; + } + } + rb_link_node (&p->rt.edf_node,parent,link); + rb_insert_color(&p->rt.edf_node,&rt_rq->edf_root); + if (!equal) + last_bit = (parent==NULL)?0x2:~(parent->rb_parent_color & 0x02); + p->rt.edf_node.rb_parent_color |= (last_bit & 0x02); + if (leftmost_flag) + { + leftmost = rt_rq->edf_next = &p->rt; + if (equal) { + has_equal = 1; + } + } + (rt_rq_of_se(&p->rt))->edf_running++; + inc_rt_tasks(&p->rt,rt_rq_of_se(&p->rt)); +} + +void dequeue_task_edf(struct rq *rq, struct task_struct *p) +{ + struct rb_node *next_node; + struct rb_node *prev_node; + struct rb_node *assign_node; + if (rq->rt.edf_running > 2) + { + next_node = rb_next(&leftmost->edf_node); + if (&p->rt.edf_node == next_node) + next_node = rb_next(next_node); + else if (&p->rt == leftmost) + { + leftmost = rb_entry (next_node, struct sched_rt_entity, edf_node); + next_node = rb_next(&leftmost->edf_node); + } + if (&p->rt == rq->rt.edf_next) + { + rq->rt.edf_next = rb_entry(rb_next(&(rq->rt.edf_next->edf_node)), struct sched_rt_entity, edf_node); + if (has_equal && (rq->rt.edf_next == NULL || check_bit((&(p->rt.edf_node)),(&(rq->rt.edf_next->edf_node))))) + rq->rt.edf_next = leftmost; + } + has_equal = !check_bit((&leftmost->edf_node), next_node); + } + else + { + + next_node = rb_next (&p->rt.edf_node); + prev_node = rb_prev (&p->rt.edf_node); + assign_node = (next_node == NULL) ? prev_node : next_node; + if (assign_node != NULL) + leftmost = rq->rt.edf_next = rb_entry(assign_node, struct sched_rt_entity, edf_node); + else + leftmost = rq->rt.edf_next = NULL; + has_equal = 0; + } + (rt_rq_of_se(&p->rt))->edf_running--; + dec_rt_tasks(&p->rt, rt_rq_of_se(&p->rt)); + rb_erase(&p->rt.edf_node, &rq->rt.edf_root); +} + +struct sched_rt_entity *pick_next_task_edf(struct rq *rq) +{ + struct sched_rt_entity *retval; + struct rb_node *next_node; + retval = rq->rt.edf_next; + if (has_equal) + { + next_node = rb_next(&retval->edf_node); + rq->rt.edf_next = (next_node == NULL || check_bit((&rq->rt.edf_next->edf_node), next_node)) ? leftmost : + rb_entry(next_node, struct sched_rt_entity, edf_node); + } + return retval; +} diff -Naur linux-2.6.32-rc3/kernel/sched_rt.c linux-2.6.32-rc3-drtl/kernel/sched_rt.c --- linux-2.6.32-rc3/kernel/sched_rt.c 2009-10-05 05:42:30.000000000 +0530 +++ linux-2.6.32-rc3-drtl/kernel/sched_rt.c 2009-10-20 10:40:16.000000000 +0530 @@ -3,6 +3,14 @@ * policies) */ +#ifdef CONFIG_SCHED_EDF +struct kthread_deadmiss_info { + struct completion deadmiss; + struct task_struct *k; +}; +extern struct kthread_deadmiss_info kthread_dead_info; +#endif + #ifdef CONFIG_RT_GROUP_SCHED #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) @@ -607,6 +615,7 @@ if (unlikely((s64)delta_exec < 0)) delta_exec = 0; + schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; @@ -614,7 +623,6 @@ curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); - sched_rt_avg_update(rq, delta_exec); if (!rt_bandwidth_enabled()) @@ -885,6 +893,11 @@ if (wakeup) rt_se->timeout = 0; +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + enqueue_task_edf(rq,p); + else +#endif enqueue_rt_entity(rt_se); if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) @@ -896,7 +909,12 @@ struct sched_rt_entity *rt_se = &p->rt; update_curr_rt(rq); - dequeue_rt_entity(rt_se); +#ifdef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + dequeue_task_edf(rq,p); + else +#endif + dequeue_rt_entity(rt_se); dequeue_pushable_task(rq, p); } @@ -924,6 +942,11 @@ struct sched_rt_entity *rt_se = &p->rt; struct rt_rq *rt_rq; +#ifndef CONFIG_SCHED_EDF + if (p->policy == SCHED_EDF) + return; +#endif + for_each_sched_rt_entity(rt_se) { rt_rq = rt_rq_of_se(rt_se); requeue_rt_entity(rt_rq, rt_se, head); @@ -1036,10 +1059,22 @@ int idx; idx = sched_find_first_bit(array->bitmap); +#ifdef CONFIG_SCHED_EDF + BUG_ON(!rt_rq->edf_next && idx >= MAX_RT_PRIO); +#else BUG_ON(idx >= MAX_RT_PRIO); +#endif - queue = array->queue + idx; - next = list_entry(queue->next, struct sched_rt_entity, run_list); +#ifdef CONFIG_SCHED_EDF + if (!rt_rq->edf_next || rt_se_prio(rt_rq->edf_next) > idx) { +#endif + queue = array->queue + idx; + next = list_entry(queue->next, struct sched_rt_entity, run_list); + +#ifdef CONFIG_SCHED_EDF + } + else next = pick_next_task_edf(rq); +#endif return next; } @@ -1089,11 +1124,30 @@ return p; } + static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); p->se.exec_start = 0; - +#ifdef CONFIG_SCHED_EDF + /* Deadline miss handler for run away tasks */ + if (p->policy == SCHED_EDF && !p->flags&PF_HARDIRQ) { + if (ktime_sub(p->edf_deadline,ktime_get()).tv64 <= 0) { + if (p->flags&PF_KTHREAD) { + dequeue_task_edf (rq,p); + kthread_dead_info.k = p; + complete (&kthread_dead_info.deadmiss); + set_task_state(p, TASK_INTERRUPTIBLE); + } + else { + sigaddset(&p->pending.signal,SIGMISSDEAD); + set_tsk_thread_flag(p, TIF_SIGPENDING); + p->exit_code = EXIT_MISS_DEADLINE; + } + } + } +#endif + /* * The previous task needs to be made eligible for pushing * if it is still active On Wed, Oct 21, 2009 at 9:08 PM, Soumya K S <ssks.mt@gmail.com> wrote: > Hello All, > > We would like to present a patch on Deployment specific Real-Time > Linux, topic discussed in LinuxCon 2009 > <http://events.linuxfoundation.org/lc09d17> > > The developed framework allows user to specify the real-time > strategies for a specific real-time scenario. User specifies > configurations like scheduling policy, Deadline-miss Fault-tolerance > limit, interrupt priorities, etc. Real-time applications use onetime > gateway to notify kernel that they require real-time response. All > applications use existing POSIX APIs. DRTL scheduler is time-aware and > uses EDF as the scheduling policy. > > The patch consists of Time aware scheduler having SCHED_EDF as the > scheduling policy, Deadline based scheduling for Interrupt handlers, > Deadline Inheritance support for RT-Mutexes. > > The patch is for the kernel version 2.6.32-rc3. It has been tested on > OMAP3530, ATMEL AT91SAM9261 and X86 platforms. > > We look forward for support and feedback about > DRTL <http://docs.google.com/fileview?id=0BzLQtQ1qAO7uYjYyN2QwNGUtYWE2YS00MDc1LWExYWUtZTliNjNjNmZiZTZj&hl=en> > and the patch for its feasibility, scalability and performance. > > Many Thanks, > Soumya KS > Shubhro Sinha > ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2009-11-10 14:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <e932a33e0910210833j1fab6129m23b55101c5978c4a@mail.gmail.com>
2009-10-21 15:38 ` [PATCH] DRTL kernel 2.6.32-rc3 : SCHED_EDF, DI RT-Mutex, Deadline Based Interrupt Handlers Soumya K S
2009-10-22 7:54 ` Raistlin
2009-10-22 14:42 ` Soumya K S
2009-10-25 8:46 ` Raistlin
2009-10-28 14:15 ` Soumya K S
2009-10-28 16:24 ` Raistlin
2009-11-10 14:03 ` Soumya K S
2009-11-10 14:34 ` Peter Zijlstra
2009-10-22 9:33 ` Claudio Scordino
[not found] ` <e932a33e0910220738v3c612d68ve902a770ab0928f4@mail.gmail.com>
2009-10-22 14:40 ` Soumya K S
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox