* [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. [not found] <20060602165336.147812000@localhost> @ 2006-06-02 22:23 ` Esben Nielsen 2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen ` (3 subsequent siblings) 4 siblings, 0 replies; 11+ messages in thread From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw) To: linux-kernel; +Cc: Ingo Molnar This patch contains a new lock type based which can be both a raw_spin_lock and a rt_lock (a rt_mutex basicly). The type can be changed _runtime_. spin_mutex_t is a direct drop-in for spin_lock_t. Under PREEMPT_RT it works as a rt_lock to begin with otherwise it works as a raw_spin_lock. The behavior can be changed with the functions void spin_mutex_to_mutex(spin_mutex_t *lock); void spin_mutex_to_spin(spin_mutex_t *lock); These functions can block because they have to be sure they are the owner of rt_lock before doing the change-over. Index: linux-2.6.16-rt25/include/linux/spin_mutex.h =================================================================== --- /dev/null +++ linux-2.6.16-rt25/include/linux/spin_mutex.h @@ -0,0 +1,116 @@ +/* + * Locks which runtime can be changed from spin lock to mutex and back again + * + * Copyright (c) 2006 Esben Nielsen + * + */ + +#ifndef __LINUX_SPIN_MUTEX_H +#define __LINUX_SPIN_MUTEX_H + +#ifndef CONFIG_SPIN_MUTEXES +#include <linux/spinlock.h> + +/* + * A spin_mutex_t is now just a spinlock_t, which can be a raw_spinlock_t or + * a rt_mutex_t. + * The macros for locking picks up the right operations before picking up the + * specific spin_mutex_t operations, which will now be bad operations + */ + +typedef spinlock_t spin_mutex_t; + +#define spin_mutex_lock_init(lock,lockname,file,line) __bad_spinlock_type() +#define spin_mutex_lock(lock) __bad_spinlock_type() +#define spin_mutex_trylock(lock) __bad_spinlock_type() +#define spin_mutex_unlock(lock) __bad_spinlock_type() +#define spin_mutex_lock_bh(lock) __bad_spinlock_type() +#define spin_mutex_unlock_bh(lock) __bad_spinlock_type() +#define spin_mutex_lock_irq(lock) __bad_spinlock_type() +#define spin_mutex_unlock_irq(lock) __bad_spinlock_type() +#define spin_mutex_lock_irqsave(lock) __bad_spinlock_type() +#define spin_mutex_trylock_irqsave(lock, flags) __bad_spinlock_type() +#define spin_mutex_unlock_irqrestore(lock, flags) __bad_spinlock_type() +#define spin_mutex_unlock_no_resched(lock) __bad_spinlock_type() +#define spin_mutex_unlock_wait(lock) __bad_spinlock_type() +#define spin_mutex_is_locked(lock) __bad_spinlock_type() + +static inline void spin_mutex_to_mutex(spin_mutex_t *lock) +{ +} + +static inline void spin_mutex_to_spin(spin_mutex_t *lock) +{ +} + +static inline int spin_mutexes_can_spin(void) +{ +#ifdef CONFIG_PREEMPT_RT + return 0; +#else + return 1; +#endif +} + +#else /* CONFIG_SPIN_MUTEXES */ + + +#include <linux/rtmutex.h> + +enum spin_mutex_state { + SPIN_MUTEX_SPIN, + SPIN_MUTEX_MUTEX +}; + +struct spin_mutex { + /* The state variable is protected by the mutex or + mutex.wait_lock depending on it's value */ + enum spin_mutex_state state; + struct rt_mutex mutex; +}; + +typedef struct spin_mutex spin_mutex_t; + +#define __SPIN_MUTEX_INITIALIZER(mutexname,s) \ + { .state = (s), \ + .mutex = (struct rt_mutex)__RT_MUTEX_INITIALIZER(mutexname) } + +#define DEFINE_SPIN_MUTEX(mutexname,s) \ + spin_mutex_t mutexname = __SPIN_MUTEX_INITIALIZER(mutexname,s) + +void spin_mutex_lock_init(spin_mutex_t *lock, + const char *lockname, + const char *file, + int line); +/* May block, depending on state */ +void spin_mutex_lock(spin_mutex_t *lock); +int spin_mutex_trylock(spin_mutex_t *lock); +void spin_mutex_unlock(spin_mutex_t *lock); +void spin_mutex_unlock_no_resched(spin_mutex_t *lock); +void spin_mutex_unlock_wait(spin_mutex_t *lock); + +void spin_mutex_lock_irq(spin_mutex_t *lock); +int spin_mutex_trylock_irq(spin_mutex_t *lock); +void spin_mutex_unlock_irq(spin_mutex_t *lock); + +unsigned long spin_mutex_lock_bh(spin_mutex_t *lock); +int spin_mutex_trylock_bh(spin_mutex_t *lock); +void spin_mutex_unlock_bh(spin_mutex_t *lock); + +int spin_mutex_is_locked(spin_mutex_t *lock); + +unsigned long spin_mutex_lock_irqsave(spin_mutex_t *lock); +int spin_mutex_trylock_irqsave(spin_mutex_t *lock, unsigned long *flags); +void spin_mutex_unlock_irqrestore(spin_mutex_t *lock, unsigned long flags); + +static inline int spin_mutexes_can_spin(void) +{ + return 1; +} + +/* Blocks until the lock is converted */; +void spin_mutex_to_mutex(spin_mutex_t *lock); +void spin_mutex_to_spin(spin_mutex_t *lock); + +#endif /* CONFIG_SPIN_MUTEXES */ +#endif Index: linux-2.6.16-rt25/include/linux/spinlock.h =================================================================== --- linux-2.6.16-rt25.orig/include/linux/spinlock.h +++ linux-2.6.16-rt25/include/linux/spinlock.h @@ -209,6 +209,8 @@ do { \ _raw_##optype##op((type *)(lock)); \ else if (TYPE_EQUAL(lock, spinlock_t)) \ _spin##op((spinlock_t *)(lock)); \ + else if (TYPE_EQUAL(lock, spin_mutex_t)) \ + spin_mutex##op((spin_mutex_t *)(lock)); \ else __bad_spinlock_type(); \ } while (0) @@ -220,6 +222,8 @@ do { \ __ret = _raw_##optype##op((type *)(lock)); \ else if (TYPE_EQUAL(lock, spinlock_t)) \ __ret = _spin##op((spinlock_t *)(lock)); \ + else if (TYPE_EQUAL(lock, spin_mutex_t)) \ + __ret = spin_mutex##op((spin_mutex_t *)(lock)); \ else __ret = __bad_spinlock_type(); \ \ __ret; \ @@ -231,6 +235,8 @@ do { \ _raw_##optype##op((type *)(lock), flags); \ else if (TYPE_EQUAL(lock, spinlock_t)) \ _spin##op((spinlock_t *)(lock), flags); \ + else if (TYPE_EQUAL(lock, spin_mutex_t)) \ + spin_mutex##op((spin_mutex_t *)(lock), flags); \ else __bad_spinlock_type(); \ } while (0) @@ -242,6 +248,8 @@ do { \ __ret = _raw_##optype##op((type *)(lock), flags);\ else if (TYPE_EQUAL(lock, spinlock_t)) \ __ret = _spin##op((spinlock_t *)(lock), flags); \ + else if (TYPE_EQUAL(lock, spin_mutex_t)) \ + __ret = spin_mutex##op((spin_mutex_t *)(lock), flags);\ else __bad_spinlock_type(); \ \ __ret; \ @@ -347,6 +355,8 @@ do { \ _raw_##optype##op((type *)(lock)); \ else if (TYPE_EQUAL(lock, spinlock_t)) \ _spin##op((spinlock_t *)(lock), #lock, __FILE__, __LINE__); \ + else if (TYPE_EQUAL(lock, spin_mutex_t)) \ + spin_mutex##op((spin_mutex_t *)(lock), #lock, __FILE__, __LINE__); \ else __bad_spinlock_type(); \ } while (0) @@ -579,5 +589,7 @@ static inline int bit_spin_is_locked(int */ #define __raw_spin_can_lock(lock) (!__raw_spin_is_locked(lock)) +#include <linux/spin_mutex.h> + #endif /* __LINUX_SPINLOCK_H */ Index: linux-2.6.16-rt25/init/Kconfig =================================================================== --- linux-2.6.16-rt25.orig/init/Kconfig +++ linux-2.6.16-rt25/init/Kconfig @@ -332,6 +332,10 @@ config RT_MUTEXES boolean select PLIST +config SPIN_MUTEXES + bool "Locks which can be changed between spinlocks and mutexes runtime." + select RT_MUTEXES + config FUTEX bool "Enable futex support" if EMBEDDED default y Index: linux-2.6.16-rt25/kernel/Makefile =================================================================== --- linux-2.6.16-rt25.orig/kernel/Makefile +++ linux-2.6.16-rt25/kernel/Makefile @@ -19,6 +19,7 @@ ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o endif obj-$(CONFIG_RT_MUTEXES) += rtmutex.o +obj-$(CONFIG_SPIN_MUTEXES) += spin_mutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_PREEMPT_RT) += rt.o Index: linux-2.6.16-rt25/kernel/spin_mutex.c =================================================================== --- /dev/null +++ linux-2.6.16-rt25/kernel/spin_mutex.c @@ -0,0 +1,153 @@ +#include <linux/spin_mutex.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +#include "rtmutex_common.h" + +#ifdef CONFIG_DEBUG_RT_MUTEXES +# include "rtmutex-debug.h" +#else +# include "rtmutex.h" +#endif + + +void spin_mutex_lock_init(struct spin_mutex *lock, + const char *name, + const char *file, + int line) +{ +#ifdef CONFIG_PREEMPT_RT + lock->state = SPIN_MUTEX_MUTEX; +#else + lock->state = SPIN_MUTEX_SPIN; +#endif + rt_mutex_init(&lock->mutex); +} + +static inline int spin_mutex_lock_common(struct spin_mutex *lock, + unsigned long *flags, int try) +{ + retry: + switch(lock->state) { + case SPIN_MUTEX_SPIN: + if (try) { + if (!spin_trylock_irqsave(&lock->mutex.wait_lock, + *flags)) + return 0; + } + else + spin_lock_irqsave(&lock->mutex.wait_lock, *flags); + + if (unlikely(lock->state != SPIN_MUTEX_SPIN)) { + spin_unlock_irqrestore(&lock->mutex.wait_lock, *flags); + goto retry; + } + return 1; + case SPIN_MUTEX_MUTEX: + *flags = 0; + if (try) { + if (!rt_mutex_trylock(&lock->mutex)) + return 0; + } + else + rt_lock(&lock->mutex); + + if (unlikely(lock->state != SPIN_MUTEX_MUTEX)) { + rt_unlock(&lock->mutex); + goto retry; + } + return 1; + } + BUG_ON(1); +} + + +unsigned long spin_mutex_lock_irqsave(struct spin_mutex *lock) +{ + unsigned long flags; + spin_mutex_lock_common(lock, &flags, 0); + + return flags; +} + +void spin_mutex_lock_irq(struct spin_mutex *lock) +{ + unsigned long flags; + spin_mutex_lock_common(lock, &flags, 0); +} + +void spin_mutex_lock(struct spin_mutex *lock) +{ + unsigned long flags; + spin_mutex_lock_common(lock, &flags, 0); +} + + +int spin_mutex_trylock_irqsave(struct spin_mutex *lock, + unsigned long *flags) +{ + return spin_mutex_lock_common(lock, flags, 1); +} + +void spin_mutex_unlock_irqrestore(struct spin_mutex *lock, unsigned long flags) +{ + switch(lock->state) { + case SPIN_MUTEX_SPIN: + spin_unlock_irqrestore(&lock->mutex.wait_lock, flags); + break; + case SPIN_MUTEX_MUTEX: + rt_unlock(&lock->mutex); + break; + } +} + +void spin_mutex_unlock_irq(struct spin_mutex *lock) +{ + switch(lock->state) { + case SPIN_MUTEX_SPIN: + spin_unlock_irq(&lock->mutex.wait_lock); + break; + case SPIN_MUTEX_MUTEX: + rt_unlock(&lock->mutex); + break; + } +} + +void spin_mutex_unlock(struct spin_mutex *lock) +{ + switch(lock->state) { + case SPIN_MUTEX_SPIN: + spin_unlock(&lock->mutex.wait_lock); + break; + case SPIN_MUTEX_MUTEX: + rt_unlock(&lock->mutex); + break; + } +} + +void spin_mutex_to_mutex(struct spin_mutex *lock) +{ + unsigned long flags; + + rt_lock(&lock->mutex); + spin_lock_irqsave(&lock->mutex.wait_lock,flags); + + lock->state = SPIN_MUTEX_MUTEX; + + spin_unlock_irqrestore(&lock->mutex.wait_lock,flags); + rt_unlock(&lock->mutex); +} + +/* Blocks until converted */; +void spin_mutex_to_spin(struct spin_mutex *lock) +{ + unsigned long flags; + + rt_lock(&lock->mutex); + spin_lock_irqsave(&lock->mutex.wait_lock,flags); + + lock->state = SPIN_MUTEX_SPIN; + + spin_unlock_irqrestore(&lock->mutex.wait_lock,flags); + rt_unlock(&lock->mutex); +} Index: linux-2.6.16-rt25/kernel/rtmutex.c =================================================================== --- linux-2.6.16-rt25.orig/kernel/rtmutex.c +++ linux-2.6.16-rt25/kernel/rtmutex.c @@ -585,7 +585,7 @@ static void remove_waiter(struct rt_mute spin_lock(&lock->wait_lock); } -#ifdef CONFIG_PREEMPT_RT +#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_SPIN_MUTEX) static inline void rt_lock_fastlock(struct rt_mutex *lock, -- ^ permalink raw reply [flat|nested] 11+ messages in thread
* [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. [not found] <20060602165336.147812000@localhost> 2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen @ 2006-06-02 22:23 ` Esben Nielsen 2006-06-03 20:21 ` Steven Rostedt 2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen ` (2 subsequent siblings) 4 siblings, 1 reply; 11+ messages in thread From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw) To: linux-kernel; +Cc: Ingo Molnar This patch makes it possible to change which context the interrupt handlers for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is set. The interface is the file /proc/irq/<irq number>/threaded You can read it to see what the context is now or write one of irq fifo <rt priority> rr <rt priority> normal <nice value> batch <nice value> where one of the latter makes the interrupt handler threaded. A replacement for request_irq(), called request_irq2(), is added. When a driver uses this to install it's irq-handler it can also give a change_irq_context call-back. This call-back is called whenever the irq-context is changed or going to be changed. The call-back must be of the form int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd) where enum change_context_cmd { IRQ_TO_HARDIRQ, IRQ_CAN_THREAD, IRQ_TO_THREADED }; The call-back is supposed to do the following on IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking or return a non-zero error code. IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in thread context. Return a non-zero error code otherwise. IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The driver can now change it's locks to being mutexes. The return value is ignored as the driver already got a chance to protest above. Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h +++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h @@ -34,21 +34,38 @@ typedef int irqreturn_t; #define IRQ_HANDLED (1) #define IRQ_RETVAL(x) ((x) != 0) +enum change_context_cmd { + IRQ_TO_HARDIRQ, + IRQ_CAN_THREAD, + IRQ_TO_THREADED +}; + struct irqaction { irqreturn_t (*handler)(int, void *, struct pt_regs *); unsigned long flags; cpumask_t mask; const char *name; void *dev_id; +#ifdef CONFIG_CHANGE_IRQ_CONTEXT + int (*change_context)(int, void *, + enum change_context_cmd); +#endif struct irqaction *next; int irq; - struct proc_dir_entry *dir, *threaded; + struct proc_dir_entry *dir; + struct rcu_head rcu; }; extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs); extern int request_irq(unsigned int, irqreturn_t (*handler)(int, void *, struct pt_regs *), unsigned long, const char *, void *); +extern int request_irq2(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id, + int (*change_context)(int, void *, + enum change_context_cmd)); extern void free_irq(unsigned int, void *); Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h +++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h @@ -47,6 +47,18 @@ # define SA_NODELAY 0x01000000 #endif +#ifndef SA_MUST_THREAD +# define SA_MUST_THREAD 0x02000000 +#endif + +/* Set this flag if the irq handler must thread under preempt-rt otherwise not + */ +#ifdef CONFIG_PREEMPT_RT +# define SA_MUST_THREAD_RT SA_MUST_THREAD +#else +# define SA_MUST_THREAD_RT 0 +#endif + /* * IRQ types */ @@ -147,12 +159,13 @@ struct irq_type { * @chip: low level hardware access functions - comes from type * @action: the irq action chain * @status: status information + * (protected by the spinlock ) * @depth: disable-depth, for nested irq_disable() calls * @irq_count: stats field to detect stalled irqs * @irqs_unhandled: stats field for spurious unhandled interrupts * @thread: Thread pointer for threaded preemptible irq handling * @wait_for_handler: Waitqueue to wait for a running preemptible handler - * @lock: locking for SMP + * @lock: lock around the action list * @move_irq: Flag need to re-target interrupt destination * * Pad this out to 32 bytes for cache and indexing reasons. Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt +++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt @@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS Say N if you are unsure. +config CHANGE_IRQ_CONTEXT + bool "Change the irq context runtime" + depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES) + help + You can change wether the IRQ handler(s) for each IRQ number is + running in hardirq context or as threaded by writing to + /proc/irq/<number>/threaded + If PREEMPT_RT is selected the drivers involved must be ready for it, + though, or the write will fail. Remember to switch on SPIN_MUTEXES as + well in that case as the drivers which are ready uses spin-mutexes. + config SPINLOCK_BKL bool "Old-Style Big Kernel Lock" depends on (PREEMPT || SMP) && !PREEMPT_RT Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h @@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d } #ifdef CONFIG_PROC_FS +#ifdef CONFIG_CHANGE_IRQ_CONTEXT +extern int make_irq_nodelay(int irq, struct irq_desc *desc); +extern int make_irq_threaded(int irq, struct irq_desc *desc); +#endif extern void register_irq_proc(unsigned int irq); extern void register_handler_proc(unsigned int irq, struct irqaction *action); extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c @@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un return !action; } + +#ifdef CONFIG_CHANGE_IRQ_CONTEXT +int make_action_nodelay(int irq, struct irqaction *act) +{ + unsigned long flags; + + if(!(act->flags & SA_MUST_THREAD)) { + return 0; + } + + if( act->change_context ) { + int ret = + act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ); + if(ret) { + printk(KERN_ERR "Failed to change irq handler " + "for dev %s on IRQ%d to hardirq " + "context (ret: %d)\n", act->name, irq, ret); + return ret; + } + spin_lock_irqsave(&irq_desc[irq].lock, flags); + act->flags &=~SA_MUST_THREAD; + act->flags |= SA_NODELAY; + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + return 0; + } + else { + printk(KERN_ERR "Irq handler " + "for dev %s on IRQ%d can not be changed" + " to hardirq context\n", act->name, irq); + return -ENOSYS; + } + +} + + +static int __make_irq_threaded(int irq, struct irq_desc *desc); + +int make_irq_nodelay(int irq, struct irq_desc *desc) +{ + int ret = 0; + struct irqaction *act; + unsigned long flags; + + rcu_read_lock(); + for(act = desc->action; act; act = act->next) { + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); + if(act->flags & SA_MUST_THREAD) { + ret = make_action_nodelay(irq, act); + if(ret) { + printk(KERN_ERR "Could not make irq %d " + "nodelay (errno %d)\n", + irq, ret); + goto failed; + } + } + } + spin_lock_irqsave(&desc->lock, flags); + desc->status |= IRQ_NODELAY; + spin_unlock_irqrestore(&desc->lock, flags); + rcu_read_unlock(); + + return 0; + failed: + __make_irq_threaded(irq, desc); + rcu_read_unlock(); + return ret; +} + +int action_may_thread(int irq, struct irqaction *act) +{ + if(!act->change_context) + return !(act->flags & SA_NODELAY); + + return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0; +} + + +static int __make_irq_threaded(int irq, struct irq_desc *desc) +{ + struct irqaction *act; + + for(act = desc->action; act; act = act->next) { + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); + if(!action_may_thread(irq, act)) { + return -EINVAL; + } + } + + if (start_irq_thread(irq, desc)) + return -ENOMEM; + + spin_lock_irq(&desc->lock); + desc->status &= ~IRQ_NODELAY; + spin_unlock_irq(&desc->lock); + + /* We can't make irq handlers change their context before we + are sure no CPU is running them in hard irq context */ + synchronize_irq(irq); + + for(act = desc->action; act; act = act->next) { + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); + if(act->change_context) { + /* This callback can't fail */ + act->change_context(irq, act->dev_id, IRQ_TO_THREADED); + spin_lock_irq(&desc->lock); + act->flags &=~SA_NODELAY; + act->flags |= SA_MUST_THREAD; + spin_unlock_irq(&desc->lock); + } + } + + return 0; +} + +int make_irq_threaded(int irq, struct irq_desc *desc) +{ + int ret; + rcu_read_lock(); + ret = __make_irq_threaded(irq, desc); + rcu_read_unlock(); + return ret; +} +#else /* CONFIG_CHANGE_IRQ_CONTEXT */ +int make_action_nodelay(int irq, struct irqaction *act) +{ + return -EINVAL; +} +int make_irq_nodelay(int irq, struct irq_desc *desc) +{ + unsigned long flags; + struct irqaction *act; + + spin_lock_irqsave(&desc->lock, flags); + for(act = desc->action; act; act = act->next) { + WARN_ON((~act->flags) & (SA_MUST_THREAD|SA_NODELAY) == 0); + if(act->flags & SA_MUST_THREAD) + return -EINVAL; + } + + desc->status |= IRQ_NODELAY; + spin_unlock_irqrestore(&desc->lock, flags); + + return 0; +} + +#endif /* !CONFIG_CHANGE_IRQ_CONTEXT */ + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -239,11 +386,28 @@ int setup_irq(unsigned int irq, struct i rand_initialize_irq(irq); } + WARN_ON(((~new->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); if (!(new->flags & SA_NODELAY)) if (start_irq_thread(irq, desc)) return -ENOMEM; + + if( (desc->status & IRQ_NODELAY) && (new->flags & SA_MUST_THREAD) ) { + int ret = make_action_nodelay(irq, new); + if(ret) + return ret; + } + + + if( (new->flags & SA_NODELAY) && !(desc->status & IRQ_NODELAY) ) { + int ret = make_irq_nodelay(irq, desc); + if(ret) + return ret; + } + + /* - * The following block of code has to be executed atomically + * The following block of code has to be executed atomically due + * to the hard irq context */ spin_lock_irqsave(&desc->lock, flags); p = &desc->action; @@ -262,7 +426,7 @@ int setup_irq(unsigned int irq, struct i shared = 1; } - *p = new; + rcu_assign_pointer(*p, new); /* * Propagate any possible SA_NODELAY flag into IRQ_NODELAY: @@ -282,13 +446,25 @@ int setup_irq(unsigned int irq, struct i new->irq = irq; register_irq_proc(irq); - new->dir = new->threaded = NULL; + new->dir = NULL; register_handler_proc(irq, new); return 0; } /** + * free_irqaction_rcu - free the actual memory block of a struct irqaction + * @rcu: Pointer to the struct rcu_head + * + * Is called after RCU syncronization + */ +static void free_irqaction_rcu(struct rcu_head *rcu) +{ + struct irqaction *p = container_of(rcu, struct irqaction, rcu); + kfree(p); +} + +/** * free_irq - free an interrupt * @irq: Interrupt line to free * @dev_id: Device identity to free @@ -312,6 +488,7 @@ void free_irq(unsigned int irq, void *de return; desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock, flags); p = &desc->action; for (;;) { @@ -325,7 +502,7 @@ void free_irq(unsigned int irq, void *de continue; /* Found it - now remove it from the list of entries */ - *pp = action->next; + rcu_assign_pointer(*pp, action->next); /* Currently used only by UML, might disappear one day.*/ #ifdef CONFIG_IRQ_RELEASE_METHOD @@ -344,9 +521,8 @@ void free_irq(unsigned int irq, void *de spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); - /* Make sure it's not being used on another CPU */ synchronize_irq(irq); - kfree(action); + call_rcu(&action->rcu, free_irqaction_rcu); return; } printk(KERN_ERR "Trying to free free IRQ%d\n", irq); @@ -358,12 +534,14 @@ void free_irq(unsigned int irq, void *de EXPORT_SYMBOL(free_irq); /** - * request_irq - allocate an interrupt line + * request_irq2 - allocate an interrupt line * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device * @dev_id: A cookie passed back to the handler function + * @change_context: call back for requesting a change of context + * (threaded vs hardirq) * * This call allocates interrupt resources and enables the * interrupt line and IRQ handling. From the point this @@ -381,18 +559,23 @@ EXPORT_SYMBOL(free_irq); * * Flags: * - * SA_SHIRQ Interrupt is shared + * SA_SHIRQ Interrupt can be shared * SA_INTERRUPT Disable local interrupts while processing * SA_SAMPLE_RANDOM The interrupt can be used for entropy + * SA_NODELAY The handler must run in hardirq context + * SA_MUSTTHREAD The handler can not run in hardirq context * */ -int request_irq(unsigned int irq, - irqreturn_t (*handler)(int, void *, struct pt_regs *), - unsigned long irqflags, const char * devname, void *dev_id) +int request_irq2(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, void *dev_id, + int (*change_context)(int, void *, + enum change_context_cmd)) { struct irqaction * action; int retval; + /* * Sanity-check: shared interrupts must pass in a real dev-ID, * otherwise we'll have trouble later trying to figure out @@ -416,8 +599,11 @@ int request_irq(unsigned int irq, action->flags = irqflags; cpus_clear(action->mask); action->name = devname; - action->next = NULL; action->dev_id = dev_id; +#ifdef CONFIG_CHANGE_IRQ_CONTEXT + action->change_context = change_context; +#endif + action->next = NULL; select_smp_affinity(irq); @@ -427,7 +613,17 @@ int request_irq(unsigned int irq, return retval; } +EXPORT_SYMBOL(request_irq2); +int request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ + if( !(irqflags & SA_NODELAY) ) + irqflags |= SA_MUST_THREAD_RT; + + return request_irq2(irq, handler, irqflags, devname, dev_id, NULL); +} EXPORT_SYMBOL(request_irq); /** Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/proc.c =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/proc.c +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/proc.c @@ -11,6 +11,7 @@ #include <linux/profile.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> +#include <linux/ctype.h> #include "internals.h" @@ -83,6 +84,138 @@ static int irq_affinity_write_proc(struc #endif + +#ifdef CONFIG_CHANGE_IRQ_CONTEXT +/* + * The /proc/irq/<irq>/threaded values: + */ +static struct proc_dir_entry *irq_threaded_entry[NR_IRQS]; + +static const char *sched_policies[] = { + "normal", + "fifo", + "rr", + "batch" +}; + +static int irq_threaded_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned int irq = (int)(long)data; + task_t *t; + + if ( (irq_desc[irq].status & IRQ_NODELAY) + || !irq_desc[irq].thread ) { + return scnprintf(page, count, "IRQ\n"); + } + + t = irq_desc[irq].thread; + if (t->policy==SCHED_NORMAL && t->policy == SCHED_BATCH ) + return scnprintf(page, count, "%s %d\n", + sched_policies[t->policy], + task_prio(t)); + else + return scnprintf(page, count, "%s %lu\n", + sched_policies[t->policy], + t->rt_priority); +} + + +static int set_rt_prio(int irq, int policy, char *arg) +{ + char *tmp; + struct sched_param prm; + int ret; + task_t *t; + + while(isspace(*arg)) + arg++; + + if(*arg) { + prm.sched_priority = simple_strtol(arg,&tmp,10); + if (tmp == arg) + return -EINVAL; + } + else + return -EINVAL; + + ret = make_irq_threaded(irq, &irq_desc[irq]); + if(ret) + return ret; + + t = irq_desc[irq].thread; + return sched_setscheduler(t, policy, &prm); +} + +static int set_nonrt_prio(int irq, int policy, char *arg) +{ + char *tmp; + long prio; + int ret; + struct sched_param prm; + task_t *t; + + while(isspace(*arg)) + arg++; + + + if(*arg) { + prio = simple_strtol(arg,&tmp,10); + if (tmp == arg || prio < -20 || prio > 19) + return -EINVAL; + } + else + prio = 0; + + ret = make_irq_threaded(irq, &irq_desc[irq]); + if(ret) + return ret; + + t = irq_desc[irq].thread; + prm.sched_priority = 0; + ret = sched_setscheduler(t, policy, &prm); + if(ret) + return ret; + + set_user_nice(t, prio); + return 0; +} + +static int irq_threaded_write_proc(struct file *file, + const char __user *buffer, + unsigned long count, void *data) +{ + unsigned int irq = (int)(long)data; + int ret = 0; + char buf[20]; + + if(copy_from_user( (void*)&buf[0], (void*)buffer, + count>20 ? 20: count ) ) + return -EFAULT; + + buf[19] = 0; + + if(!strnicmp(buf,"irq",3)) + ret = make_irq_nodelay(irq, &irq_desc[irq]); + else if(!strnicmp(buf,"normal",3)) + ret = set_nonrt_prio(irq, SCHED_NORMAL, buf+6); + else if(!strnicmp(buf,"fifo",3)) + ret = set_rt_prio(irq, SCHED_FIFO, buf+4); + else if(!strnicmp(buf,"rr",2)) + ret = set_rt_prio(irq, SCHED_RR, buf+2); + else if(!strnicmp(buf,"batch",3)) + ret = set_nonrt_prio(irq, SCHED_BATCH, buf+5); + else + ret = -EINVAL; + + + if(ret) + return ret; + else + return count; +} +#endif /* CONFIG_CHANGE_IRQ_CONTEXT */ + #define MAX_NAMELEN 10 void register_irq_proc(unsigned int irq) @@ -116,58 +249,33 @@ void register_irq_proc(unsigned int irq) smp_affinity_entry[irq] = entry; } #endif + +#ifdef CONFIG_CHANGE_IRQ_CONTEXT + { + struct proc_dir_entry *entry; + + /* create /proc/irq/<irq>/threaded */ + entry = create_proc_entry("threaded", 0600, irq_dir[irq]); + + if (entry) { + entry->nlink = 1; + entry->data = (void *)(long)irq; + entry->read_proc = irq_threaded_read_proc; + entry->write_proc = irq_threaded_write_proc; + } + irq_threaded_entry[irq] = entry; + } +#endif } #undef MAX_NAMELEN void unregister_handler_proc(unsigned int irq, struct irqaction *action) { - if (action->threaded) - remove_proc_entry(action->threaded->name, action->dir); if (action->dir) remove_proc_entry(action->dir->name, irq_dir[irq]); } -#ifndef CONFIG_PREEMPT_RT - -#ifndef CONFIG_PREEMPT_RT - -static int threaded_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - return sprintf(page, "%c\n", - ((struct irqaction *)data)->flags & SA_NODELAY ? '0' : '1'); -} - -static int threaded_write_proc(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - int c; - struct irqaction *action = data; - irq_desc_t *desc = irq_desc + action->irq; - - if (get_user(c, buffer)) - return -EFAULT; - if (c != '0' && c != '1') - return -EINVAL; - - spin_lock_irq(&desc->lock); - - if (c == '0') - action->flags |= SA_NODELAY; - if (c == '1') - action->flags &= ~SA_NODELAY; - recalculate_desc_flags(desc); - - spin_unlock_irq(&desc->lock); - - return 1; -} - -#endif - -#endif - #define MAX_NAMELEN 128 static int name_unique(unsigned int irq, struct irqaction *new_action) @@ -197,20 +305,6 @@ void register_handler_proc(unsigned int action->dir = proc_mkdir(name, irq_dir[irq]); if (!action->dir) return; -#ifndef CONFIG_PREEMPT_RT - { - struct proc_dir_entry *entry; - /* create /proc/irq/1234/handler/threaded */ - entry = create_proc_entry("threaded", 0600, action->dir); - if (!entry) - return; - entry->nlink = 1; - entry->data = (void *)action; - entry->read_proc = threaded_read_proc; - entry->write_proc = threaded_write_proc; - action->threaded = entry; - } -#endif } #undef MAX_NAMELEN -- ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen @ 2006-06-03 20:21 ` Steven Rostedt 2006-06-04 17:33 ` Esben Nielsen 0 siblings, 1 reply; 11+ messages in thread From: Steven Rostedt @ 2006-06-03 20:21 UTC (permalink / raw) To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar Disclaimer: I haven't read all your patches yet. I'm going one at a time to comment, and then I will probably send more emails about the overall response. So now, my comments are not on the big picture, but simple atomic views. On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: > This patch makes it possible to change which context the interrupt handlers > for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is > set. > > The interface is the file > /proc/irq/<irq number>/threaded > You can read it to see what the context is now or write one of > > irq > fifo <rt priority> > rr <rt priority> > normal <nice value> > batch <nice value> > > where one of the latter makes the interrupt handler threaded. > > A replacement for request_irq(), called request_irq2(), is added. When a driver request_irq2 ... yuck! Perhaps request_irq_convertible() or something similar? But irq2, no way! > uses this to install it's irq-handler it can also give a change_irq_context > call-back. This call-back is called whenever the irq-context is changed or > going to be changed. The call-back must be of the form > > int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd) Eeee, looks like a big change to make on drivers, and something that can keep -rt from mainline forever. But I'll see more as I read. This looks optional but still it will make things more complex. > > where > > enum change_context_cmd { > IRQ_TO_HARDIRQ, > IRQ_CAN_THREAD, > IRQ_TO_THREADED > }; > > The call-back is supposed to do the following on > IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking > or return a non-zero error code. > IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in > thread context. Return a non-zero error code otherwise. > IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The > driver can now change it's locks to being mutexes. The return > value is ignored as the driver already got a chance to protest > above. > > Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h > +++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h > @@ -34,21 +34,38 @@ typedef int irqreturn_t; > #define IRQ_HANDLED (1) > #define IRQ_RETVAL(x) ((x) != 0) > > +enum change_context_cmd { > + IRQ_TO_HARDIRQ, > + IRQ_CAN_THREAD, > + IRQ_TO_THREADED > +}; > + > struct irqaction { > irqreturn_t (*handler)(int, void *, struct pt_regs *); > unsigned long flags; > cpumask_t mask; > const char *name; > void *dev_id; > +#ifdef CONFIG_CHANGE_IRQ_CONTEXT > + int (*change_context)(int, void *, > + enum change_context_cmd); > +#endif > struct irqaction *next; > int irq; > - struct proc_dir_entry *dir, *threaded; > + struct proc_dir_entry *dir; > + struct rcu_head rcu; > }; > > extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs); > extern int request_irq(unsigned int, > irqreturn_t (*handler)(int, void *, struct pt_regs *), > unsigned long, const char *, void *); > +extern int request_irq2(unsigned int irq, > + irqreturn_t (*handler)(int, void *, struct pt_regs *), > + unsigned long irqflags, const char * devname, > + void *dev_id, > + int (*change_context)(int, void *, > + enum change_context_cmd)); > extern void free_irq(unsigned int, void *); > > > Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h > +++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h > @@ -47,6 +47,18 @@ > # define SA_NODELAY 0x01000000 > #endif > > +#ifndef SA_MUST_THREAD > +# define SA_MUST_THREAD 0x02000000 > +#endif > + > +/* Set this flag if the irq handler must thread under preempt-rt otherwise not > + */ > +#ifdef CONFIG_PREEMPT_RT > +# define SA_MUST_THREAD_RT SA_MUST_THREAD > +#else > +# define SA_MUST_THREAD_RT 0 > +#endif > + > /* > * IRQ types > */ > @@ -147,12 +159,13 @@ struct irq_type { > * @chip: low level hardware access functions - comes from type > * @action: the irq action chain > * @status: status information > + * (protected by the spinlock ) > * @depth: disable-depth, for nested irq_disable() calls > * @irq_count: stats field to detect stalled irqs > * @irqs_unhandled: stats field for spurious unhandled interrupts > * @thread: Thread pointer for threaded preemptible irq handling > * @wait_for_handler: Waitqueue to wait for a running preemptible handler > - * @lock: locking for SMP > + * @lock: lock around the action list > * @move_irq: Flag need to re-target interrupt destination > * > * Pad this out to 32 bytes for cache and indexing reasons. > Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt > +++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt > @@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS > > Say N if you are unsure. > > +config CHANGE_IRQ_CONTEXT > + bool "Change the irq context runtime" > + depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES) > + help > + You can change wether the IRQ handler(s) for each IRQ number is > + running in hardirq context or as threaded by writing to > + /proc/irq/<number>/threaded > + If PREEMPT_RT is selected the drivers involved must be ready for it, > + though, or the write will fail. Remember to switch on SPIN_MUTEXES as > + well in that case as the drivers which are ready uses spin-mutexes. > + > config SPINLOCK_BKL > bool "Old-Style Big Kernel Lock" > depends on (PREEMPT || SMP) && !PREEMPT_RT > Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h > +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h > @@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d > } > > #ifdef CONFIG_PROC_FS > +#ifdef CONFIG_CHANGE_IRQ_CONTEXT > +extern int make_irq_nodelay(int irq, struct irq_desc *desc); > +extern int make_irq_threaded(int irq, struct irq_desc *desc); > +#endif > extern void register_irq_proc(unsigned int irq); > extern void register_handler_proc(unsigned int irq, struct irqaction *action); > extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); > Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c > +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c > @@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un > return !action; > } > > + > +#ifdef CONFIG_CHANGE_IRQ_CONTEXT > +int make_action_nodelay(int irq, struct irqaction *act) > +{ > + unsigned long flags; > + > + if(!(act->flags & SA_MUST_THREAD)) { > + return 0; > + } Remove the brackets. Also, let me get this straight. If the action does not have SA_MUST_THREAD, we return? Or does it mean that if SA_MUST_THREAD is not set, then SA_NODELAY must already be set? If that is the case, then why are we not checking for SA_NODELAY here? > + > + if( act->change_context ) { > + int ret = > + act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ); > + if(ret) { > + printk(KERN_ERR "Failed to change irq handler " > + "for dev %s on IRQ%d to hardirq " > + "context (ret: %d)\n", act->name, irq, ret); > + return ret; > + } > + spin_lock_irqsave(&irq_desc[irq].lock, flags); > + act->flags &=~SA_MUST_THREAD; Both flags are zero here (see below about the WARN_ON) > + act->flags |= SA_NODELAY; > + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); > + return 0; > + } > + else { > + printk(KERN_ERR "Irq handler " > + "for dev %s on IRQ%d can not be changed" > + " to hardirq context\n", act->name, irq); > + return -ENOSYS; > + } > + > +} > + > + > +static int __make_irq_threaded(int irq, struct irq_desc *desc); > + > +int make_irq_nodelay(int irq, struct irq_desc *desc) > +{ > + int ret = 0; > + struct irqaction *act; > + unsigned long flags; > + > + rcu_read_lock(); > + for(act = desc->action; act; act = act->next) { > + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); This WARN_ON is not protected by the descriptor lock, so if this function is run on two CPUs at the same time, then the act->flags can have both of these zero by the above code. > + if(act->flags & SA_MUST_THREAD) { > + ret = make_action_nodelay(irq, act); > + if(ret) { > + printk(KERN_ERR "Could not make irq %d " > + "nodelay (errno %d)\n", > + irq, ret); We printk on failure from above, and then printk again here? > + goto failed; > + } > + } > + } > + spin_lock_irqsave(&desc->lock, flags); > + desc->status |= IRQ_NODELAY; > + spin_unlock_irqrestore(&desc->lock, flags); > + rcu_read_unlock(); > + > + return 0; > + failed: > + __make_irq_threaded(irq, desc); > + rcu_read_unlock(); > + return ret; > +} > + > +int action_may_thread(int irq, struct irqaction *act) > +{ > + if(!act->change_context) > + return !(act->flags & SA_NODELAY); > + > + return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0; This IRQ_CAN_THREAD just looks out of place of the other two. It feels very "hacky" to check if it can thread. But what do I know? > +} > + > + > +static int __make_irq_threaded(int irq, struct irq_desc *desc) > +{ > + struct irqaction *act; > + > + for(act = desc->action; act; act = act->next) { > + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); > + if(!action_may_thread(irq, act)) { > + return -EINVAL; > + } > + } > + > + if (start_irq_thread(irq, desc)) > + return -ENOMEM; I know that currently start_irq_thread can only return -ENOMEM on failure, but it may be more robust to capture the return code and return that anyway. > + > + spin_lock_irq(&desc->lock); > + desc->status &= ~IRQ_NODELAY; > + spin_unlock_irq(&desc->lock); > + > + /* We can't make irq handlers change their context before we > + are sure no CPU is running them in hard irq context */ > + synchronize_irq(irq); > + > + for(act = desc->action; act; act = act->next) { > + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); > + if(act->change_context) { > + /* This callback can't fail */ Will all device drivers know that the callback can't fail? > + act->change_context(irq, act->dev_id, IRQ_TO_THREADED); > + spin_lock_irq(&desc->lock); > + act->flags &=~SA_NODELAY; > + act->flags |= SA_MUST_THREAD; > + spin_unlock_irq(&desc->lock); > + } > + } > + > + return 0; > +} [snipped the rest] -- Steve ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-03 20:21 ` Steven Rostedt @ 2006-06-04 17:33 ` Esben Nielsen 0 siblings, 0 replies; 11+ messages in thread From: Esben Nielsen @ 2006-06-04 17:33 UTC (permalink / raw) To: rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar On Sat, 3 Jun 2006, Steven Rostedt wrote: > Disclaimer: I haven't read all your patches yet. I'm going one at a > time to comment, and then I will probably send more emails about the > overall response. So now, my comments are not on the big picture, but > simple atomic views. > > On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: >> This patch makes it possible to change which context the interrupt handlers >> for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is >> set. >> >> The interface is the file >> /proc/irq/<irq number>/threaded >> You can read it to see what the context is now or write one of >> >> irq >> fifo <rt priority> >> rr <rt priority> >> normal <nice value> >> batch <nice value> >> >> where one of the latter makes the interrupt handler threaded. >> >> A replacement for request_irq(), called request_irq2(), is added. When a driver > > request_irq2 ... yuck! Perhaps request_irq_convertible() or something > similar? But irq2, no way! I know... The problem is: 1) Don't change existing drivers. 2) The change_irq_context() call-back must be set at request_irq(), so it it is not enough to just make a new function where the driver can set it's the callback after the request_irq(). request_irq_convertible() might be an ok name. > >> uses this to install it's irq-handler it can also give a change_irq_context >> call-back. This call-back is called whenever the irq-context is changed or >> going to be changed. The call-back must be of the form >> >> int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd) > > Eeee, looks like a big change to make on drivers, and something that can > keep -rt from mainline forever. But I'll see more as I read. This > looks optional but still it will make things more complex. It is only optional, but the function is very easy to make. > >> >> where >> >> enum change_context_cmd { >> IRQ_TO_HARDIRQ, >> IRQ_CAN_THREAD, >> IRQ_TO_THREADED >> }; >> >> The call-back is supposed to do the following on >> IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking >> or return a non-zero error code. >> IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in >> thread context. Return a non-zero error code otherwise. >> IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The >> driver can now change it's locks to being mutexes. The return >> value is ignored as the driver already got a chance to protest >> above. >> >> Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h >> +++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h >> @@ -34,21 +34,38 @@ typedef int irqreturn_t; >> #define IRQ_HANDLED (1) >> #define IRQ_RETVAL(x) ((x) != 0) >> >> +enum change_context_cmd { >> + IRQ_TO_HARDIRQ, >> + IRQ_CAN_THREAD, >> + IRQ_TO_THREADED >> +}; >> + >> struct irqaction { >> irqreturn_t (*handler)(int, void *, struct pt_regs *); >> unsigned long flags; >> cpumask_t mask; >> const char *name; >> void *dev_id; >> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT >> + int (*change_context)(int, void *, >> + enum change_context_cmd); >> +#endif >> struct irqaction *next; >> int irq; >> - struct proc_dir_entry *dir, *threaded; >> + struct proc_dir_entry *dir; >> + struct rcu_head rcu; >> }; >> >> extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs); >> extern int request_irq(unsigned int, >> irqreturn_t (*handler)(int, void *, struct pt_regs *), >> unsigned long, const char *, void *); >> +extern int request_irq2(unsigned int irq, >> + irqreturn_t (*handler)(int, void *, struct pt_regs *), >> + unsigned long irqflags, const char * devname, >> + void *dev_id, >> + int (*change_context)(int, void *, >> + enum change_context_cmd)); >> extern void free_irq(unsigned int, void *); >> >> >> Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h >> +++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h >> @@ -47,6 +47,18 @@ >> # define SA_NODELAY 0x01000000 >> #endif >> >> +#ifndef SA_MUST_THREAD >> +# define SA_MUST_THREAD 0x02000000 >> +#endif >> + >> +/* Set this flag if the irq handler must thread under preempt-rt otherwise not >> + */ >> +#ifdef CONFIG_PREEMPT_RT >> +# define SA_MUST_THREAD_RT SA_MUST_THREAD >> +#else >> +# define SA_MUST_THREAD_RT 0 >> +#endif >> + >> /* >> * IRQ types >> */ >> @@ -147,12 +159,13 @@ struct irq_type { >> * @chip: low level hardware access functions - comes from type >> * @action: the irq action chain >> * @status: status information >> + * (protected by the spinlock ) >> * @depth: disable-depth, for nested irq_disable() calls >> * @irq_count: stats field to detect stalled irqs >> * @irqs_unhandled: stats field for spurious unhandled interrupts >> * @thread: Thread pointer for threaded preemptible irq handling >> * @wait_for_handler: Waitqueue to wait for a running preemptible handler >> - * @lock: locking for SMP >> + * @lock: lock around the action list >> * @move_irq: Flag need to re-target interrupt destination >> * >> * Pad this out to 32 bytes for cache and indexing reasons. >> Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt >> +++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt >> @@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS >> >> Say N if you are unsure. >> >> +config CHANGE_IRQ_CONTEXT >> + bool "Change the irq context runtime" >> + depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES) >> + help >> + You can change wether the IRQ handler(s) for each IRQ number is >> + running in hardirq context or as threaded by writing to >> + /proc/irq/<number>/threaded >> + If PREEMPT_RT is selected the drivers involved must be ready for it, >> + though, or the write will fail. Remember to switch on SPIN_MUTEXES as >> + well in that case as the drivers which are ready uses spin-mutexes. >> + >> config SPINLOCK_BKL >> bool "Old-Style Big Kernel Lock" >> depends on (PREEMPT || SMP) && !PREEMPT_RT >> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h >> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h >> @@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d >> } >> >> #ifdef CONFIG_PROC_FS >> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT >> +extern int make_irq_nodelay(int irq, struct irq_desc *desc); >> +extern int make_irq_threaded(int irq, struct irq_desc *desc); >> +#endif >> extern void register_irq_proc(unsigned int irq); >> extern void register_handler_proc(unsigned int irq, struct irqaction *action); >> extern void unregister_handler_proc(unsigned int irq, struct irqaction *action); >> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c >> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c >> @@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un >> return !action; >> } >> >> + >> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT >> +int make_action_nodelay(int irq, struct irqaction *act) >> +{ >> + unsigned long flags; >> + >> + if(!(act->flags & SA_MUST_THREAD)) { >> + return 0; >> + } > > Remove the brackets. > > Also, let me get this straight. If the action does not have > SA_MUST_THREAD, we return? Or does it mean that if SA_MUST_THREAD is > not set, then SA_NODELAY must already be set? If that is the case, then > why are we not checking for SA_NODELAY here? If SA_MUST_THREAD is not set, there is no blocking inside the action handler and therefore it is ok to do this in hardirq aka "nodelay". Notice SA_MUST_THREAD is set on all handlers in without SA_NODELAY under PREEMPT_RT. > >> + >> + if( act->change_context ) { >> + int ret = >> + act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ); >> + if(ret) { >> + printk(KERN_ERR "Failed to change irq handler " >> + "for dev %s on IRQ%d to hardirq " >> + "context (ret: %d)\n", act->name, irq, ret); >> + return ret; >> + } >> + spin_lock_irqsave(&irq_desc[irq].lock, flags); >> + act->flags &=~SA_MUST_THREAD; > > Both flags are zero here (see below about the WARN_ON) > The WARN_ON is a warning on both flags set (or supposed to be...:-) It doesn't make sense to have SA_MUST_THREAD and SA_NODELAY both set. >> + act->flags |= SA_NODELAY; >> + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); >> + return 0; >> + } >> + else { >> + printk(KERN_ERR "Irq handler " >> + "for dev %s on IRQ%d can not be changed" >> + " to hardirq context\n", act->name, irq); >> + return -ENOSYS; >> + } >> + >> +} >> + >> + >> +static int __make_irq_threaded(int irq, struct irq_desc *desc); >> + >> +int make_irq_nodelay(int irq, struct irq_desc *desc) >> +{ >> + int ret = 0; >> + struct irqaction *act; >> + unsigned long flags; >> + >> + rcu_read_lock(); >> + for(act = desc->action; act; act = act->next) { >> + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); > > This WARN_ON is not protected by the descriptor lock, so if this > function is run on two CPUs at the same time, then the act->flags can > have both of these zero by the above code. Both be set :-) But bug noticed. > >> + if(act->flags & SA_MUST_THREAD) { >> + ret = make_action_nodelay(irq, act); >> + if(ret) { >> + printk(KERN_ERR "Could not make irq %d " >> + "nodelay (errno %d)\n", >> + irq, ret); > > We printk on failure from above, and then printk again here? Well might be too verbose. > >> + goto failed; >> + } >> + } >> + } >> + spin_lock_irqsave(&desc->lock, flags); >> + desc->status |= IRQ_NODELAY; >> + spin_unlock_irqrestore(&desc->lock, flags); >> + rcu_read_unlock(); >> + >> + return 0; >> + failed: >> + __make_irq_threaded(irq, desc); >> + rcu_read_unlock(); >> + return ret; >> +} >> + >> +int action_may_thread(int irq, struct irqaction *act) >> +{ >> + if(!act->change_context) >> + return !(act->flags & SA_NODELAY); >> + >> + return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0; > > This IRQ_CAN_THREAD just looks out of place of the other two. It feels > very "hacky" to check if it can thread. But what do I know? The problem is that the step to make a handler be threaded has to be split in two: First ask, then remove IRQ_NODELAY, then change the locks to mutexes. So the driver need to be involved twice. I could skip this, but I put it in for generallity. > >> +} >> + >> + >> +static int __make_irq_threaded(int irq, struct irq_desc *desc) >> +{ >> + struct irqaction *act; >> + >> + for(act = desc->action; act; act = act->next) { >> + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); >> + if(!action_may_thread(irq, act)) { >> + return -EINVAL; >> + } >> + } >> + >> + if (start_irq_thread(irq, desc)) >> + return -ENOMEM; > > I know that currently start_irq_thread can only return -ENOMEM on > failure, but it may be more robust to capture the return code and return > that anyway. *nod* This was copied from the original code, so the same thing is elsewher, too. > >> + >> + spin_lock_irq(&desc->lock); >> + desc->status &= ~IRQ_NODELAY; >> + spin_unlock_irq(&desc->lock); >> + >> + /* We can't make irq handlers change their context before we >> + are sure no CPU is running them in hard irq context */ >> + synchronize_irq(irq); >> + >> + for(act = desc->action; act; act = act->next) { >> + WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0); >> + if(act->change_context) { >> + /* This callback can't fail */ > > Will all device drivers know that the callback can't fail? Well, it is in the documentation. > >> + act->change_context(irq, act->dev_id, IRQ_TO_THREADED); >> + spin_lock_irq(&desc->lock); >> + act->flags &=~SA_NODELAY; >> + act->flags |= SA_MUST_THREAD; >> + spin_unlock_irq(&desc->lock); >> + } >> + } >> + >> + return 0; >> +} > > [snipped the rest] > > -- Steve > > ^ permalink raw reply [flat|nested] 11+ messages in thread
* [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. [not found] <20060602165336.147812000@localhost> 2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen 2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen @ 2006-06-02 22:23 ` Esben Nielsen 2006-06-03 20:39 ` Steven Rostedt 2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen 2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen 4 siblings, 1 reply; 11+ messages in thread From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw) To: linux-kernel; +Cc: Ingo Molnar Makes it possible for the e100 ethernet driver to have it's interrupt handler in both hard-irq and threaded context under PREEMPT_RT. Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c +++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c @@ -530,7 +530,7 @@ struct nic { enum ru_state ru_running; spinlock_t cb_lock ____cacheline_aligned; - spinlock_t cmd_lock; + spin_mutex_t cmd_lock; struct csr __iomem *csr; enum scb_cmd_lo cuc_cmd; unsigned int cbs_avail; @@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic return 0; } +static int e100_change_context(int irq, void *dev_id, + enum change_context_cmd cmd) +{ + struct net_device *netdev = dev_id; + struct nic *nic = netdev_priv(netdev); + + switch(cmd) + { + case IRQ_TO_HARDIRQ: + if(!spin_mutexes_can_spin()) + return -ENOSYS; + + spin_mutex_to_spin(&nic->cmd_lock); + break; + case IRQ_CAN_THREAD: + /* Ok - return 0 */ + break; + case IRQ_TO_THREADED: + spin_mutex_to_mutex(&nic->cmd_lock); + break; + } + return 0; /* Ok */ +} + static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *netdev = dev_id; @@ -2064,9 +2088,12 @@ static int e100_up(struct nic *nic) e100_set_multicast_list(nic->netdev); e100_start_receiver(nic, NULL); mod_timer(&nic->watchdog, jiffies); - if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ, - nic->netdev->name, nic->netdev))) + if((err = request_irq2(nic->pdev->irq, e100_intr, + SA_SHIRQ|SA_MUST_THREAD_RT, + nic->netdev->name, nic->netdev, + e100_change_context))) goto err_no_irq; + netif_wake_queue(nic->netdev); netif_poll_enable(nic->netdev); /* enable ints _after_ enabling poll, preventing a race between -- ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen @ 2006-06-03 20:39 ` Steven Rostedt 2006-06-04 17:34 ` Esben Nielsen 0 siblings, 1 reply; 11+ messages in thread From: Steven Rostedt @ 2006-06-03 20:39 UTC (permalink / raw) To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: > Makes it possible for the e100 ethernet driver to have it's interrupt handler > in both hard-irq and threaded context under PREEMPT_RT. > > Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c > +++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c > @@ -530,7 +530,7 @@ struct nic { > enum ru_state ru_running; > > spinlock_t cb_lock ____cacheline_aligned; > - spinlock_t cmd_lock; > + spin_mutex_t cmd_lock; > struct csr __iomem *csr; > enum scb_cmd_lo cuc_cmd; > unsigned int cbs_avail; > @@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic > return 0; > } > > +static int e100_change_context(int irq, void *dev_id, > + enum change_context_cmd cmd) > +{ > + struct net_device *netdev = dev_id; > + struct nic *nic = netdev_priv(netdev); > + > + switch(cmd) > + { > + case IRQ_TO_HARDIRQ: > + if(!spin_mutexes_can_spin()) > + return -ENOSYS; > + > + spin_mutex_to_spin(&nic->cmd_lock); > + break; > + case IRQ_CAN_THREAD: > + /* Ok - return 0 */ > + break; Why even bother with the IRQ_CAN_THREAD. If this would be anything other than OK, then we shouldn't be using that request_irq2 (yuck!) call in the first place. -- Steve > + case IRQ_TO_THREADED: > + spin_mutex_to_mutex(&nic->cmd_lock); > + break; > + } > + return 0; /* Ok */ > +} > + > static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs) > { > struct net_device *netdev = dev_id; > @@ -2064,9 +2088,12 @@ static int e100_up(struct nic *nic) > e100_set_multicast_list(nic->netdev); > e100_start_receiver(nic, NULL); > mod_timer(&nic->watchdog, jiffies); > - if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ, > - nic->netdev->name, nic->netdev))) > + if((err = request_irq2(nic->pdev->irq, e100_intr, > + SA_SHIRQ|SA_MUST_THREAD_RT, > + nic->netdev->name, nic->netdev, > + e100_change_context))) > goto err_no_irq; > + > netif_wake_queue(nic->netdev); > netif_poll_enable(nic->netdev); > /* enable ints _after_ enabling poll, preventing a race between > ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-03 20:39 ` Steven Rostedt @ 2006-06-04 17:34 ` Esben Nielsen 0 siblings, 0 replies; 11+ messages in thread From: Esben Nielsen @ 2006-06-04 17:34 UTC (permalink / raw) To: Steven Rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar On Sat, 3 Jun 2006, Steven Rostedt wrote: > On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: >> Makes it possible for the e100 ethernet driver to have it's interrupt handler >> in both hard-irq and threaded context under PREEMPT_RT. >> >> Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c >> +++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c >> @@ -530,7 +530,7 @@ struct nic { >> enum ru_state ru_running; >> >> spinlock_t cb_lock ____cacheline_aligned; >> - spinlock_t cmd_lock; >> + spin_mutex_t cmd_lock; >> struct csr __iomem *csr; >> enum scb_cmd_lo cuc_cmd; >> unsigned int cbs_avail; >> @@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic >> return 0; >> } >> >> +static int e100_change_context(int irq, void *dev_id, >> + enum change_context_cmd cmd) >> +{ >> + struct net_device *netdev = dev_id; >> + struct nic *nic = netdev_priv(netdev); >> + >> + switch(cmd) >> + { >> + case IRQ_TO_HARDIRQ: >> + if(!spin_mutexes_can_spin()) >> + return -ENOSYS; >> + >> + spin_mutex_to_spin(&nic->cmd_lock); >> + break; >> + case IRQ_CAN_THREAD: >> + /* Ok - return 0 */ >> + break; > > Why even bother with the IRQ_CAN_THREAD. If this would be anything > other than OK, then we shouldn't be using that request_irq2 (yuck!) call > in the first place. > Just for the sake of generality. Nothing else. It would be very unlikely, as you say, that it wouldn't return 0. > -- Steve Esben ^ permalink raw reply [flat|nested] 11+ messages in thread
* [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. [not found] <20060602165336.147812000@localhost> ` (2 preceding siblings ...) 2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen @ 2006-06-02 22:23 ` Esben Nielsen 2006-06-03 21:30 ` Steven Rostedt 2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen 4 siblings, 1 reply; 11+ messages in thread From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw) To: linux-kernel; +Cc: Ingo Molnar Makes it possible for the 8250 serial driver to have it's interrupt handler in both hard-irq and threaded context under PREEMPT_RT. Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c +++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c @@ -140,7 +140,7 @@ struct uart_8250_port { }; struct irq_info { - spinlock_t lock; + spin_mutex_t lock; struct list_head *head; }; @@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_ spin_unlock(&up->port.lock); } + +static int serial8250_change_context(int irq, void *dev_id, + enum change_context_cmd cmd) +{ + struct irq_info *i = dev_id; + struct list_head *l; + + switch(cmd) { + case IRQ_TO_HARDIRQ: + /* Spin mutexes not available: */ + if(!spin_mutexes_can_spin()) + return -ENOSYS; + + /* First do the inner locks */ + l = i->head; + do { + struct uart_8250_port *up; + up = list_entry(l, struct uart_8250_port, list); + spin_mutex_to_spin(&up->port.lock); + l = l->next; + } while(l != i->head && l != NULL); + spin_mutex_to_spin(&i->lock); + break; + case IRQ_CAN_THREAD: + break; + case IRQ_TO_THREADED: + spin_mutex_to_mutex(&i->lock); + l = i->head; + + do { + struct uart_8250_port *up; + up = list_entry(l, struct uart_8250_port, list); + spin_mutex_to_mutex(&up->port.lock); + l = l->next; + } while(l != i->head && l != NULL); + break; + } + + return 0; +} + /* * This is the serial driver's interrupt routine. * @@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct i->head = &up->list; spin_unlock_irq(&i->lock); - ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, "serial", i); + ret = request_irq2(up->port.irq, serial8250_interrupt, + irq_flags | SA_MUST_THREAD_RT, + "serial", i, + serial8250_change_context); if (ret < 0) serial_do_unlink(i, up); } Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h +++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h @@ -206,7 +206,7 @@ struct uart_icount { typedef unsigned int __bitwise__ upf_t; struct uart_port { - spinlock_t lock; /* port lock */ + spin_mutex_t lock; /* port lock */ unsigned int iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ unsigned int irq; /* irq number */ -- ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen @ 2006-06-03 21:30 ` Steven Rostedt 2006-06-04 22:50 ` Esben Nielsen 0 siblings, 1 reply; 11+ messages in thread From: Steven Rostedt @ 2006-06-03 21:30 UTC (permalink / raw) To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: > Makes it possible for the 8250 serial driver to have it's interrupt handler > in both hard-irq and threaded context under PREEMPT_RT. > > Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c > +++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c > @@ -140,7 +140,7 @@ struct uart_8250_port { > }; > > struct irq_info { > - spinlock_t lock; > + spin_mutex_t lock; > struct list_head *head; > }; > > @@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_ > spin_unlock(&up->port.lock); > } > > + > +static int serial8250_change_context(int irq, void *dev_id, > + enum change_context_cmd cmd) > +{ > + struct irq_info *i = dev_id; > + struct list_head *l; > + > + switch(cmd) { > + case IRQ_TO_HARDIRQ: > + /* Spin mutexes not available: */ > + if(!spin_mutexes_can_spin()) > + return -ENOSYS; > + > + /* First do the inner locks */ > + l = i->head; > + do { > + struct uart_8250_port *up; > + up = list_entry(l, struct uart_8250_port, list); > + spin_mutex_to_spin(&up->port.lock); > + l = l->next; > + } while(l != i->head && l != NULL); Now this shows the flaw in your whole design. Don't get me wrong, I'm very impressed that you got this working for you. But for this to be accepted, it can't be too intrusive. It is also very error prone since you must know not only the locks in the handlers, but also the locks in all the functions that you call. And each device must update them. Two things: 1) What if you have two serials, and one is threaded and one is not. It might mutex a lock that should be a spin. 2) You forgot to update sysrq_key_table_lock. That can be called from the serial interrupt as well. So even just working with two devices it's not trivial to get things right. Imagine what it would take for the whole kernel. Not to mention the maintenance nightmare it would cause. I do give you an A+ for effort, but I'm not sure if this is the solution. Frankly, it scares me, unless you can find a sure fire way to update all the locks properly, that would not cause problems elsewhere. Maybe the -rt way can introduce a new paradigm. Maybe we shouldn't have top halves at all, but instead a way to register threads to devices. Think about it, the big picture, what are interrupts for? They are to tell us that an event happened, and if there is nothing waiting for that event, then it was worthless. So really, what we could do (and this would perhaps also be good for mainline, although it would be very hard to get accepted) is to have registering functions for each device. So when a thread waits on a device, it is registered with the irq that the device has, instead of a wait queue. So each interrupt would have a priority sorted list of threads that are waiting for that interrupt. When the interrupt happens, the highest priority thread would wake up to handle it, instead of an interrupt handler. If the interrupt belonged to another thread, the one that was woken up would pass it off to the thread waiting and go back to sleep. This also allows the interrupt handlers to schedule and even write to user space, once it finds out that the thread working on the interrupt is the thread in question. Let's say that we have a network card, which are commonly shared with other devices, and it also has to do a lot before it knows what process it is working on for. And lets just say it is shared (uncommonly) with a IDE device. Now an interrupt comes in on this line. There's a list of all the highest priority tasks waiting on each device (not all tasks, just the highest for each device, like the pi_list in the rt code). So the highest priority tasks for each device is woken up (the highest for the network card and the highest for the IDE) and the interrupt handler would return (not unlike the irq threads of the current -rt, except, we wouldn't have irq threads but tasks that are waiting on them). These tasks would be in the registering function of the device, which would know how to handle the interrupt. Now if the current process is higher in priority, then it wont be preempted by this interrupt. But if a higher priority process is waiting, then the current process is preempted. So this is like interrupt handlers inheriting priorities of processes waiting. So the process now checks to see if the interrupt belongs to it, and if it does, then simply handle the case. Otherwise, if it does not belong to the device, simply go back to sleep and wait again. If this belongs to the device, but for another waiter, using our example, the network card, the process realized that it belonged to another process on another port, it could wake up that process and pass information on how much work the first process has done, so that the other process doesn't repeat the work. I'm just making this up as I go, but it really looks like this can be examined more in an academic arena. Perhaps it would even lead to less context switches, since the process waiting would already be woken up on interrupt. The above idea is based on what interrupts are for. They don't happen for no reasons, but there's almost always someone waiting for it. -- Steve > + spin_mutex_to_spin(&i->lock); > + break; > + case IRQ_CAN_THREAD: > + break; > + case IRQ_TO_THREADED: > + spin_mutex_to_mutex(&i->lock); > + l = i->head; > + > + do { > + struct uart_8250_port *up; > + up = list_entry(l, struct uart_8250_port, list); > + spin_mutex_to_mutex(&up->port.lock); > + l = l->next; > + } while(l != i->head && l != NULL); > + break; > + } > + > + return 0; > +} > + > /* > * This is the serial driver's interrupt routine. > * > @@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct > i->head = &up->list; > spin_unlock_irq(&i->lock); > > - ret = request_irq(up->port.irq, serial8250_interrupt, > - irq_flags, "serial", i); > + ret = request_irq2(up->port.irq, serial8250_interrupt, > + irq_flags | SA_MUST_THREAD_RT, > + "serial", i, > + serial8250_change_context); > if (ret < 0) > serial_do_unlink(i, up); > } > Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h > =================================================================== > --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h > +++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h > @@ -206,7 +206,7 @@ struct uart_icount { > typedef unsigned int __bitwise__ upf_t; > > struct uart_port { > - spinlock_t lock; /* port lock */ > + spin_mutex_t lock; /* port lock */ > unsigned int iobase; /* in/out[bwl] */ > unsigned char __iomem *membase; /* read/write[bwl] */ > unsigned int irq; /* irq number */ > > -- > - > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ -- Steven Rostedt Senior Programmer Kihon Technologies (607)786-4830 ^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. 2006-06-03 21:30 ` Steven Rostedt @ 2006-06-04 22:50 ` Esben Nielsen 0 siblings, 0 replies; 11+ messages in thread From: Esben Nielsen @ 2006-06-04 22:50 UTC (permalink / raw) To: Steven Rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar On Sat, 3 Jun 2006, Steven Rostedt wrote: > On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote: >> Makes it possible for the 8250 serial driver to have it's interrupt handler >> in both hard-irq and threaded context under PREEMPT_RT. >> >> Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c >> +++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c >> @@ -140,7 +140,7 @@ struct uart_8250_port { >> }; >> >> struct irq_info { >> - spinlock_t lock; >> + spin_mutex_t lock; >> struct list_head *head; >> }; >> >> @@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_ >> spin_unlock(&up->port.lock); >> } >> >> + >> +static int serial8250_change_context(int irq, void *dev_id, >> + enum change_context_cmd cmd) >> +{ >> + struct irq_info *i = dev_id; >> + struct list_head *l; >> + >> + switch(cmd) { >> + case IRQ_TO_HARDIRQ: >> + /* Spin mutexes not available: */ >> + if(!spin_mutexes_can_spin()) >> + return -ENOSYS; >> + >> + /* First do the inner locks */ >> + l = i->head; >> + do { >> + struct uart_8250_port *up; >> + up = list_entry(l, struct uart_8250_port, list); >> + spin_mutex_to_spin(&up->port.lock); >> + l = l->next; >> + } while(l != i->head && l != NULL); > > Now this shows the flaw in your whole design. Don't get me wrong, I'm > very impressed that you got this working for you. But for this to be > accepted, it can't be too intrusive. It is also very error prone since > you must know not only the locks in the handlers, but also the locks in > all the functions that you call. And each device must update them. > > Two things: > > 1) What if you have two serials, and one is threaded and one is not. It > might mutex a lock that should be a spin. > > 2) You forgot to update sysrq_key_table_lock. That can be called from > the serial interrupt as well. So even just working with two devices > it's not trivial to get things right. Imagine what it would take for > the whole kernel. Not to mention the maintenance nightmare it would > cause. I imagine the change would be done by those people using the drivers in a RT context. If they want to change a handler to run in hard-IRQ context they will have to identify all the locks anyway. Then can just as well do it this way and then submit it to the main-line kernel. If they just change them to raw_spin_lock_t it can't be submitted. I considered making a system where the right lock form could be chosen compiletime. Then the drivers would have an suboption "threaded irq" or "hard irq" and from that choice the right lock types where chosen. But then came the problem up on the list about shared interrupts. Then the ability to change the context runtime would be usefull. > > I do give you an A+ for effort, but I'm not sure if this is the > solution. Frankly, it scares me, unless you can find a sure fire way to > update all the locks properly, that would not cause problems elsewhere. Well, you will very soon hit a BUG: scheduling while in atomic if you miss some. Ofcourse, I did miss some, and I didn't get that because I didn't use that subsystem in my tests. > > Maybe the -rt way can introduce a new paradigm. Maybe we shouldn't have > top halves at all, but instead a way to register threads to devices. > Think about it, the big picture, what are interrupts for? They are to > tell us that an event happened, and if there is nothing waiting for that > event, then it was worthless. So really, what we could do (and this > would perhaps also be good for mainline, although it would be very hard > to get accepted) is to have registering functions for each device. > > So when a thread waits on a device, it is registered with the irq that > the device has, instead of a wait queue. So each interrupt would have a > priority sorted list of threads that are waiting for that interrupt. > When the interrupt happens, the highest priority thread would wake up to > handle it, instead of an interrupt handler. If the interrupt belonged to > another thread, the one that was woken up would pass it off to the > thread waiting and go back to sleep. > > This also allows the interrupt handlers to schedule and even write to > user space, once it finds out that the thread working on the interrupt > is the thread in question. > > Let's say that we have a network card, which are commonly shared with > other devices, and it also has to do a lot before it knows what process > it is working on for. And lets just say it is shared (uncommonly) with > a IDE device. > > Now an interrupt comes in on this line. There's a list of all the > highest priority tasks waiting on each device (not all tasks, just the > highest for each device, like the pi_list in the rt code). So the > highest priority tasks for each device is woken up (the highest for the > network card and the highest for the IDE) and the interrupt handler > would return (not unlike the irq threads of the current -rt, except, we > wouldn't have irq threads but tasks that are waiting on them). These > tasks would be in the registering function of the device, which would > know how to handle the interrupt. Now if the current process is higher > in priority, then it wont be preempted by this interrupt. But if a > higher priority process is waiting, then the current process is > preempted. So this is like interrupt handlers inheriting priorities of > processes waiting. > > So the process now checks to see if the interrupt belongs to it, and if > it does, then simply handle the case. Otherwise, if it does not belong > to the device, simply go back to sleep and wait again. If this belongs > to the device, but for another waiter, using our example, the network > card, the process realized that it belonged to another process on > another port, it could wake up that process and pass information on how > much work the first process has done, so that the other process doesn't > repeat the work. > > I'm just making this up as I go, but it really looks like this can be > examined more in an academic arena. Perhaps it would even lead to less > context switches, since the process waiting would already be woken up on > interrupt. The above idea is based on what interrupts are for. They > don't happen for no reasons, but there's almost always someone waiting > for it. > I must say I am not sure how this idea _really_ differs from the present setup, but then again, I don't really understand where you are going. The only major difference is that every "interrupt handler" runs in it's own thread even though they share the same interrupt line. That again leads to the usual problem: When should the interrupt be reenabled? As long as interrupts can be shared, all handlers of the same interrupt has to be handled before the it can be reinabled. Sure, you can sort it out in a way such that the high priority handler runs first, but the worst case lantency is still set by the lower priority handler, because the system simply can't reenable the interrupt after a handler has handled it. So if the high priority event happens just after the low priority one, but after the high priority handler has run, the system has to wait for the low priority handler to handle it's work, reenable the interrupt, see the high priority event and then call that handler. I can't see no matter how you split it up in threads is going to solve this problem. Actually, having them in one thread makes more sense, because when the latency will depend on the lowest priority anyway they all should have the same priority. > -- Steve If you really want to change stuff I have had another idea: Make a pool of non-running threads. In the low level assembly code handling interrupts, take one out of the pool and move the stack-pointer to the buttom of the stack. Now jump to the generic irq system using that stack instead of the stack of the task running when the interrupt occured. If an interrupt handler wants to run preemptible: Switch on interrupts and let the scheduler handle the rest. That way there isn't really any task switch, no wait queue or other "expensive" things to get the interrupts preemptible. The only thing I can think of being expensive is that you need to fix up the stack on the task running when the interrupt occuring, such that a schedule() can pick it up cleanly and take the interrupt thread in and out of the run queue - but that might even be defered to the case where it in fact is preempted. By doing that we might get rid of hard-irq context all together, as the interrupt gets "threaded" allready within the low level generic code (although the interrupts will still be off at that point). Esben > >> + spin_mutex_to_spin(&i->lock); >> + break; >> + case IRQ_CAN_THREAD: >> + break; >> + case IRQ_TO_THREADED: >> + spin_mutex_to_mutex(&i->lock);2A >> + l = i->head; >> + >> + do { >> + struct uart_8250_port *up; >> + up = list_entry(l, struct uart_8250_port, list); >> + spin_mutex_to_mutex(&up->port.lock); >> + l = l->next; >> + } while(l != i->head && l != NULL); >> + break; >> + } >> + >> + return 0; >> +} >> + >> /* >> * This is the serial driver's interrupt routine. >> * >> @@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct >> i->head = &up->list; >> spin_unlock_irq(&i->lock); >> >> - ret = request_irq(up->port.irq, serial8250_interrupt, >> - irq_flags, "serial", i); >> + ret = request_irq2(up->port.irq, serial8250_interrupt, >> + irq_flags | SA_MUST_THREAD_RT, >> + "serial", i, >> + serial8250_change_context); >> if (ret < 0) >> serial_do_unlink(i, up); >> } >> Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h >> =================================================================== >> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h >> +++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h >> @@ -206,7 +206,7 @@ struct uart_icount { >> typedef unsigned int __bitwise__ upf_t; >> >> struct uart_port { >> - spinlock_t lock; /* port lock */ >> + spin_mutex_t lock; /* port lock */ >> unsigned int iobase; /* in/out[bwl] */ >> unsigned char __iomem *membase; /* read/write[bwl] */ >> unsigned int irq; /* irq number */ >> >> -- >> - >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> Please read the FAQ at http://www.tux.org/lkml/ > -- > Steven Rostedt > Senior Programmer > Kihon Technologies > (607)786-4830 > ^ permalink raw reply [flat|nested] 11+ messages in thread
* [patch 5/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime. [not found] <20060602165336.147812000@localhost> ` (3 preceding siblings ...) 2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen @ 2006-06-02 22:23 ` Esben Nielsen 4 siblings, 0 replies; 11+ messages in thread From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw) To: linux-kernel; +Cc: Ingo Molnar Make lpptest more compliant with the request_irq() interface. I don't think the driver should set or remove flags on it's own! Index: linux-2.6.16-rt23.spin_mutex/drivers/char/lpptest.c =================================================================== --- linux-2.6.16-rt23.spin_mutex.orig/drivers/char/lpptest.c +++ linux-2.6.16-rt23.spin_mutex/drivers/char/lpptest.c @@ -150,13 +150,13 @@ static int __init lpptest_init (void) return -EAGAIN; } - if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) { - printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ); + if (request_irq (LPPTEST_IRQ, lpptest_irq, SA_NODELAY | SA_INTERRUPT, + "lpptest", dev_id)) { + printk (KERN_WARNING "lpptest: irq %d in use. " + "Unload parport module!\n", LPPTEST_IRQ); unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME); return -EAGAIN; } - irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY; - irq_desc[LPPTEST_IRQ].action->flags |= SA_NODELAY | SA_INTERRUPT; INIT_PORT(); ENABLE_IRQ(); -- ^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2006-06-04 21:49 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20060602165336.147812000@localhost>
2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen
2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen
2006-06-03 20:21 ` Steven Rostedt
2006-06-04 17:33 ` Esben Nielsen
2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen
2006-06-03 20:39 ` Steven Rostedt
2006-06-04 17:34 ` Esben Nielsen
2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen
2006-06-03 21:30 ` Steven Rostedt
2006-06-04 22:50 ` Esben Nielsen
2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox