public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
       [not found] <20060602165336.147812000@localhost>
@ 2006-06-02 22:23 ` Esben Nielsen
  2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

This patch contains a new lock type based which can be both a raw_spin_lock
and a rt_lock (a rt_mutex basicly). The type can be changed _runtime_. 
spin_mutex_t is a direct drop-in for spin_lock_t. Under PREEMPT_RT it works as 
a rt_lock to begin with otherwise it works as a raw_spin_lock.

The behavior can be changed with the functions

void spin_mutex_to_mutex(spin_mutex_t *lock);
void spin_mutex_to_spin(spin_mutex_t *lock);

These functions can block because they have to be sure they are the owner of
rt_lock before doing the change-over.

Index: linux-2.6.16-rt25/include/linux/spin_mutex.h
===================================================================
--- /dev/null
+++ linux-2.6.16-rt25/include/linux/spin_mutex.h
@@ -0,0 +1,116 @@
+/*
+ * Locks which runtime can be changed from spin lock to mutex and back again
+ *
+ * Copyright (c) 2006 Esben Nielsen
+ *
+ */
+
+#ifndef __LINUX_SPIN_MUTEX_H
+#define __LINUX_SPIN_MUTEX_H
+
+#ifndef CONFIG_SPIN_MUTEXES
+#include <linux/spinlock.h>
+
+/*
+ * A spin_mutex_t is now just a spinlock_t, which can be a raw_spinlock_t or
+ * a rt_mutex_t.
+ * The macros for locking picks up the right operations before picking up the
+ * specific spin_mutex_t operations, which will now be bad operations
+ */
+
+typedef spinlock_t spin_mutex_t;
+
+#define spin_mutex_lock_init(lock,lockname,file,line) __bad_spinlock_type()
+#define spin_mutex_lock(lock) __bad_spinlock_type()
+#define spin_mutex_trylock(lock) __bad_spinlock_type()
+#define spin_mutex_unlock(lock) __bad_spinlock_type()
+#define spin_mutex_lock_bh(lock) __bad_spinlock_type()
+#define spin_mutex_unlock_bh(lock) __bad_spinlock_type()
+#define spin_mutex_lock_irq(lock) __bad_spinlock_type()
+#define spin_mutex_unlock_irq(lock) __bad_spinlock_type()
+#define spin_mutex_lock_irqsave(lock) __bad_spinlock_type()
+#define spin_mutex_trylock_irqsave(lock, flags) __bad_spinlock_type()
+#define spin_mutex_unlock_irqrestore(lock, flags) __bad_spinlock_type()
+#define spin_mutex_unlock_no_resched(lock) __bad_spinlock_type()
+#define spin_mutex_unlock_wait(lock) __bad_spinlock_type()
+#define spin_mutex_is_locked(lock) __bad_spinlock_type()
+
+static inline void spin_mutex_to_mutex(spin_mutex_t *lock)
+{
+}
+
+static inline void spin_mutex_to_spin(spin_mutex_t *lock)
+{
+}
+
+static inline int spin_mutexes_can_spin(void)
+{
+#ifdef CONFIG_PREEMPT_RT
+	return 0;
+#else
+	return 1;
+#endif
+}
+
+#else /* CONFIG_SPIN_MUTEXES */
+
+
+#include <linux/rtmutex.h>
+
+enum spin_mutex_state {
+	SPIN_MUTEX_SPIN,
+	SPIN_MUTEX_MUTEX
+};
+
+struct spin_mutex {
+	/* The state variable is protected by the mutex or
+	   mutex.wait_lock depending on it's value */
+	enum spin_mutex_state state;
+	struct rt_mutex mutex;
+};
+
+typedef struct spin_mutex spin_mutex_t;
+
+#define __SPIN_MUTEX_INITIALIZER(mutexname,s) \
+	{ .state = (s),  \
+          .mutex = (struct rt_mutex)__RT_MUTEX_INITIALIZER(mutexname) }
+
+#define DEFINE_SPIN_MUTEX(mutexname,s) \
+	spin_mutex_t mutexname = __SPIN_MUTEX_INITIALIZER(mutexname,s)
+
+void spin_mutex_lock_init(spin_mutex_t *lock,
+			  const char *lockname,
+			  const char *file,
+			  int line);
+/* May block, depending on state */
+void spin_mutex_lock(spin_mutex_t *lock);
+int spin_mutex_trylock(spin_mutex_t *lock);
+void spin_mutex_unlock(spin_mutex_t *lock);
+void spin_mutex_unlock_no_resched(spin_mutex_t *lock);
+void spin_mutex_unlock_wait(spin_mutex_t *lock);
+
+void spin_mutex_lock_irq(spin_mutex_t *lock);
+int spin_mutex_trylock_irq(spin_mutex_t *lock);
+void spin_mutex_unlock_irq(spin_mutex_t *lock);
+
+unsigned long spin_mutex_lock_bh(spin_mutex_t *lock);
+int spin_mutex_trylock_bh(spin_mutex_t *lock);
+void spin_mutex_unlock_bh(spin_mutex_t *lock);
+
+int spin_mutex_is_locked(spin_mutex_t *lock);
+
+unsigned long spin_mutex_lock_irqsave(spin_mutex_t *lock);
+int spin_mutex_trylock_irqsave(spin_mutex_t *lock, unsigned long *flags);
+void spin_mutex_unlock_irqrestore(spin_mutex_t *lock, unsigned long flags);
+
+static inline int spin_mutexes_can_spin(void)
+{
+	return 1;
+}
+
+/* Blocks until the lock is converted */;
+void spin_mutex_to_mutex(spin_mutex_t *lock);
+void spin_mutex_to_spin(spin_mutex_t *lock);
+
+#endif /* CONFIG_SPIN_MUTEXES */
+#endif
Index: linux-2.6.16-rt25/include/linux/spinlock.h
===================================================================
--- linux-2.6.16-rt25.orig/include/linux/spinlock.h
+++ linux-2.6.16-rt25/include/linux/spinlock.h
@@ -209,6 +209,8 @@ do {								\
  		_raw_##optype##op((type *)(lock));		\
  	else if (TYPE_EQUAL(lock, spinlock_t))			\
  		_spin##op((spinlock_t *)(lock));		\
+	else if (TYPE_EQUAL(lock, spin_mutex_t))       		\
+		spin_mutex##op((spin_mutex_t *)(lock));		\
  	else __bad_spinlock_type();				\
  } while (0)

@@ -220,6 +222,8 @@ do {								\
  		__ret = _raw_##optype##op((type *)(lock));	\
  	else if (TYPE_EQUAL(lock, spinlock_t))			\
  		__ret = _spin##op((spinlock_t *)(lock));	\
+	else if (TYPE_EQUAL(lock, spin_mutex_t))		\
+		__ret = spin_mutex##op((spin_mutex_t *)(lock));	\
  	else __ret = __bad_spinlock_type();			\
  								\
  	__ret;							\
@@ -231,6 +235,8 @@ do {								\
  		_raw_##optype##op((type *)(lock), flags);	\
  	else if (TYPE_EQUAL(lock, spinlock_t))			\
  		_spin##op((spinlock_t *)(lock), flags);		\
+	else if (TYPE_EQUAL(lock, spin_mutex_t))	       	\
+		spin_mutex##op((spin_mutex_t *)(lock), flags);	\
  	else __bad_spinlock_type();				\
  } while (0)

@@ -242,6 +248,8 @@ do {								\
  		__ret = _raw_##optype##op((type *)(lock), flags);\
  	else if (TYPE_EQUAL(lock, spinlock_t))			\
  		__ret = _spin##op((spinlock_t *)(lock), flags);	\
+	else if (TYPE_EQUAL(lock, spin_mutex_t))       		\
+		__ret = spin_mutex##op((spin_mutex_t *)(lock), flags);\
  	else __bad_spinlock_type();				\
  								\
  	__ret;							\
@@ -347,6 +355,8 @@ do {									\
  		_raw_##optype##op((type *)(lock));			\
  	else if (TYPE_EQUAL(lock, spinlock_t))				\
  		_spin##op((spinlock_t *)(lock), #lock, __FILE__, __LINE__); \
+	else if (TYPE_EQUAL(lock, spin_mutex_t))		       	\
+		spin_mutex##op((spin_mutex_t *)(lock), #lock, __FILE__, __LINE__); \
  	else __bad_spinlock_type();					\
  } while (0)

@@ -579,5 +589,7 @@ static inline int bit_spin_is_locked(int
   */
  #define __raw_spin_can_lock(lock)            (!__raw_spin_is_locked(lock))

+#include <linux/spin_mutex.h>
+
  #endif /* __LINUX_SPINLOCK_H */

Index: linux-2.6.16-rt25/init/Kconfig
===================================================================
--- linux-2.6.16-rt25.orig/init/Kconfig
+++ linux-2.6.16-rt25/init/Kconfig
@@ -332,6 +332,10 @@ config RT_MUTEXES
  	boolean
  	select PLIST

+config SPIN_MUTEXES
+	bool "Locks which can be changed between spinlocks and mutexes runtime."
+	select RT_MUTEXES
+
  config FUTEX
  	bool "Enable futex support" if EMBEDDED
  	default y
Index: linux-2.6.16-rt25/kernel/Makefile
===================================================================
--- linux-2.6.16-rt25.orig/kernel/Makefile
+++ linux-2.6.16-rt25/kernel/Makefile
@@ -19,6 +19,7 @@ ifeq ($(CONFIG_COMPAT),y)
  obj-$(CONFIG_FUTEX) += futex_compat.o
  endif
  obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+obj-$(CONFIG_SPIN_MUTEXES) += spin_mutex.o
  obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
  obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
  obj-$(CONFIG_PREEMPT_RT) += rt.o
Index: linux-2.6.16-rt25/kernel/spin_mutex.c
===================================================================
--- /dev/null
+++ linux-2.6.16-rt25/kernel/spin_mutex.c
@@ -0,0 +1,153 @@
+#include <linux/spin_mutex.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include "rtmutex_common.h"
+
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
+
+void spin_mutex_lock_init(struct spin_mutex *lock,
+			  const char *name,
+			  const char *file,
+			  int line)
+{
+#ifdef CONFIG_PREEMPT_RT
+	lock->state = SPIN_MUTEX_MUTEX;
+#else
+	lock->state = SPIN_MUTEX_SPIN;
+#endif
+	rt_mutex_init(&lock->mutex);
+}
+
+static inline int spin_mutex_lock_common(struct spin_mutex *lock,
+					 unsigned long *flags, int try)
+{
+ retry:
+	switch(lock->state) {
+	case SPIN_MUTEX_SPIN:
+		if (try) {
+			if (!spin_trylock_irqsave(&lock->mutex.wait_lock,
+						 *flags))
+				return 0;
+		}
+		else
+			spin_lock_irqsave(&lock->mutex.wait_lock, *flags);
+
+		if (unlikely(lock->state != SPIN_MUTEX_SPIN)) {
+			spin_unlock_irqrestore(&lock->mutex.wait_lock, *flags);
+			goto retry;
+		}
+		return 1;
+	case SPIN_MUTEX_MUTEX:
+		*flags = 0;
+		if (try) {
+			if (!rt_mutex_trylock(&lock->mutex))
+				return 0;
+		}
+		else
+			rt_lock(&lock->mutex);
+
+		if (unlikely(lock->state != SPIN_MUTEX_MUTEX)) {
+			rt_unlock(&lock->mutex);
+			goto retry;
+		}
+		return 1;
+	}
+	BUG_ON(1);
+}
+
+
+unsigned long spin_mutex_lock_irqsave(struct spin_mutex *lock)
+{
+	unsigned long flags;
+	spin_mutex_lock_common(lock, &flags, 0);
+
+	return flags;
+}
+
+void spin_mutex_lock_irq(struct spin_mutex *lock)
+{
+	unsigned long flags;
+	spin_mutex_lock_common(lock, &flags, 0);
+}
+
+void spin_mutex_lock(struct spin_mutex *lock)
+{
+	unsigned long flags;
+	spin_mutex_lock_common(lock, &flags, 0);
+}
+
+
+int spin_mutex_trylock_irqsave(struct spin_mutex *lock,
+			       unsigned long *flags)
+{
+	return spin_mutex_lock_common(lock, flags, 1);
+}
+
+void spin_mutex_unlock_irqrestore(struct spin_mutex *lock, unsigned long flags)
+{
+	switch(lock->state) {
+	case SPIN_MUTEX_SPIN:
+		spin_unlock_irqrestore(&lock->mutex.wait_lock, flags);
+		break;
+	case SPIN_MUTEX_MUTEX:
+		rt_unlock(&lock->mutex);
+		break;
+	}
+}
+
+void spin_mutex_unlock_irq(struct spin_mutex *lock)
+{
+	switch(lock->state) {
+	case SPIN_MUTEX_SPIN:
+		spin_unlock_irq(&lock->mutex.wait_lock);
+		break;
+	case SPIN_MUTEX_MUTEX:
+		rt_unlock(&lock->mutex);
+		break;
+	}
+}
+
+void spin_mutex_unlock(struct spin_mutex *lock)
+{
+	switch(lock->state) {
+	case SPIN_MUTEX_SPIN:
+		spin_unlock(&lock->mutex.wait_lock);
+		break;
+	case SPIN_MUTEX_MUTEX:
+		rt_unlock(&lock->mutex);
+		break;
+	}
+}
+
+void spin_mutex_to_mutex(struct spin_mutex *lock)
+{
+	unsigned long flags;
+
+	rt_lock(&lock->mutex);
+	spin_lock_irqsave(&lock->mutex.wait_lock,flags);
+
+	lock->state = SPIN_MUTEX_MUTEX;
+
+	spin_unlock_irqrestore(&lock->mutex.wait_lock,flags);
+	rt_unlock(&lock->mutex);
+}
+
+/* Blocks until converted */;
+void spin_mutex_to_spin(struct spin_mutex *lock)
+{
+	unsigned long flags;
+
+	rt_lock(&lock->mutex);
+	spin_lock_irqsave(&lock->mutex.wait_lock,flags);
+
+	lock->state = SPIN_MUTEX_SPIN;
+
+	spin_unlock_irqrestore(&lock->mutex.wait_lock,flags);
+	rt_unlock(&lock->mutex);
+}
Index: linux-2.6.16-rt25/kernel/rtmutex.c
===================================================================
--- linux-2.6.16-rt25.orig/kernel/rtmutex.c
+++ linux-2.6.16-rt25/kernel/rtmutex.c
@@ -585,7 +585,7 @@ static void remove_waiter(struct rt_mute
  	spin_lock(&lock->wait_lock);
  }

-#ifdef CONFIG_PREEMPT_RT
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_SPIN_MUTEX)

  static inline void
  rt_lock_fastlock(struct rt_mutex *lock,

--

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
       [not found] <20060602165336.147812000@localhost>
  2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen
@ 2006-06-02 22:23 ` Esben Nielsen
  2006-06-03 20:21   ` Steven Rostedt
  2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

This patch makes it possible to change which context the interrupt handlers
for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is 
set.

The interface is the file
  /proc/irq/<irq number>/threaded
You can read it to see what the context is now or write one of

  irq
  fifo <rt priority>
  rr <rt priority>
  normal <nice value>
  batch <nice value>

where one of the latter makes the interrupt handler threaded.

A replacement for request_irq(), called request_irq2(), is added. When a driver
uses this to install it's irq-handler it can also give a change_irq_context
call-back. This call-back is called whenever the irq-context is changed or 
going to be changed. The call-back must be of the form

int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd)

where

enum change_context_cmd {
 	IRQ_TO_HARDIRQ,
 	IRQ_CAN_THREAD,
 	IRQ_TO_THREADED
};

The call-back is supposed to do the following on
  IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking
                  or return a non-zero error code.
  IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in
                  thread context. Return a non-zero error code otherwise.
  IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The
                 driver can now change it's locks to being mutexes. The return
                 value is ignored as the driver already got a chance to protest
                 above.

Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h
+++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
@@ -34,21 +34,38 @@ typedef int irqreturn_t;
  #define IRQ_HANDLED	(1)
  #define IRQ_RETVAL(x)	((x) != 0)

+enum change_context_cmd {
+	IRQ_TO_HARDIRQ,
+	IRQ_CAN_THREAD,
+	IRQ_TO_THREADED
+};
+
  struct irqaction {
  	irqreturn_t (*handler)(int, void *, struct pt_regs *);
  	unsigned long flags;
  	cpumask_t mask;
  	const char *name;
  	void *dev_id;
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+	int (*change_context)(int, void *,
+			      enum change_context_cmd);
+#endif
  	struct irqaction *next;
  	int irq;
-	struct proc_dir_entry *dir, *threaded;
+	struct proc_dir_entry *dir;
+	struct rcu_head rcu;
  };

  extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
  extern int request_irq(unsigned int,
  		       irqreturn_t (*handler)(int, void *, struct pt_regs *),
  		       unsigned long, const char *, void *);
+extern int request_irq2(unsigned int irq,
+			irqreturn_t (*handler)(int, void *, struct pt_regs *),
+			unsigned long irqflags, const char * devname,
+			void *dev_id,
+			int (*change_context)(int, void *,
+					      enum change_context_cmd));
  extern void free_irq(unsigned int, void *);


Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h
+++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
@@ -47,6 +47,18 @@
  # define SA_NODELAY 0x01000000
  #endif

+#ifndef SA_MUST_THREAD
+# define SA_MUST_THREAD 0x02000000
+#endif
+
+/* Set this flag if the irq handler must thread under preempt-rt otherwise not
+ */
+#ifdef CONFIG_PREEMPT_RT
+# define SA_MUST_THREAD_RT SA_MUST_THREAD
+#else
+# define SA_MUST_THREAD_RT 0
+#endif
+
  /*
   * IRQ types
   */
@@ -147,12 +159,13 @@ struct irq_type {
   * @chip:		low level hardware access functions - comes from type
   * @action:		the irq action chain
   * @status:		status information
+ *                      (protected by the spinlock )
   * @depth:		disable-depth, for nested irq_disable() calls
   * @irq_count:		stats field to detect stalled irqs
   * @irqs_unhandled:	stats field for spurious unhandled interrupts
   * @thread:		Thread pointer for threaded preemptible irq handling
   * @wait_for_handler:	Waitqueue to wait for a running preemptible handler
- * @lock:		locking for SMP
+ * @lock:		lock around the action list
   * @move_irq:		Flag need to re-target interrupt destination
   *
   * Pad this out to 32 bytes for cache and indexing reasons.
Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt
+++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
@@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS

  	  Say N if you are unsure.

+config CHANGE_IRQ_CONTEXT
+	bool "Change the irq context runtime"
+	depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES)
+	help
+	  You can change wether the IRQ handler(s) for each IRQ number is
+          running in hardirq context or as threaded by writing to
+          /proc/irq/<number>/threaded
+          If PREEMPT_RT is selected the drivers involved must be ready for it,
+          though, or the write will fail. Remember to switch on SPIN_MUTEXES as
+          well in that case as the drivers which are ready uses spin-mutexes.
+
  config SPINLOCK_BKL
  	bool "Old-Style Big Kernel Lock"
  	depends on (PREEMPT || SMP) && !PREEMPT_RT
Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h
+++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
@@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d
  }

  #ifdef CONFIG_PROC_FS
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+extern int make_irq_nodelay(int irq, struct irq_desc *desc);
+extern int make_irq_threaded(int irq, struct irq_desc *desc);
+#endif
  extern void register_irq_proc(unsigned int irq);
  extern void register_handler_proc(unsigned int irq, struct irqaction *action);
  extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c
+++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
@@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un
  	return !action;
  }

+
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+int make_action_nodelay(int irq, struct irqaction *act)
+{
+	unsigned long flags;
+
+	if(!(act->flags & SA_MUST_THREAD)) {
+		return 0;
+	}
+
+	if( act->change_context ) {
+		int ret =
+			act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ);
+		if(ret) {
+			printk(KERN_ERR "Failed to change irq handler "
+			       "for dev %s on IRQ%d to hardirq "
+			       "context (ret: %d)\n", act->name, irq, ret);
+			return ret;
+		}
+		spin_lock_irqsave(&irq_desc[irq].lock, flags);
+		act->flags &=~SA_MUST_THREAD;
+		act->flags |= SA_NODELAY;
+		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+		return 0;
+	}
+	else {
+		printk(KERN_ERR "Irq handler "
+		       "for dev %s on IRQ%d can not be changed"
+		       " to hardirq context\n", act->name, irq);
+		return -ENOSYS;
+	}
+
+}
+
+
+static int __make_irq_threaded(int irq, struct irq_desc *desc);
+
+int make_irq_nodelay(int irq, struct irq_desc *desc)
+{
+	int ret = 0;
+	struct irqaction *act;
+	unsigned long flags;
+
+	rcu_read_lock();
+	for(act = desc->action; act; act = act->next) {
+		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
+		if(act->flags & SA_MUST_THREAD) {
+			ret = make_action_nodelay(irq, act);
+			if(ret) {
+				printk(KERN_ERR "Could not make irq %d "
+				       "nodelay (errno %d)\n",
+				       irq, ret);
+				goto failed;
+			}
+		}
+	}
+	spin_lock_irqsave(&desc->lock, flags);
+	desc->status |= IRQ_NODELAY;
+	spin_unlock_irqrestore(&desc->lock, flags);
+	rcu_read_unlock();
+
+	return 0;
+ failed:
+	__make_irq_threaded(irq, desc);
+	rcu_read_unlock();
+	return ret;
+}
+
+int action_may_thread(int irq, struct irqaction *act)
+{
+	if(!act->change_context)
+		return !(act->flags & SA_NODELAY);
+
+	return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0;
+}
+
+
+static int __make_irq_threaded(int irq, struct irq_desc *desc)
+{
+	struct irqaction *act;
+
+	for(act = desc->action; act; act = act->next) {
+		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
+		if(!action_may_thread(irq, act)) {
+			return -EINVAL;
+		}
+	}
+
+	if (start_irq_thread(irq, desc))
+		return -ENOMEM;
+
+	spin_lock_irq(&desc->lock);
+	desc->status &= ~IRQ_NODELAY;
+	spin_unlock_irq(&desc->lock);
+
+	/* We can't make irq handlers change their context before we
+	   are sure no CPU is running them in hard irq context */
+	synchronize_irq(irq);
+
+	for(act = desc->action; act; act = act->next) {
+		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
+		if(act->change_context) {
+			/* This callback can't fail */
+			act->change_context(irq, act->dev_id, IRQ_TO_THREADED);
+			spin_lock_irq(&desc->lock);
+			act->flags &=~SA_NODELAY;
+			act->flags |= SA_MUST_THREAD;
+			spin_unlock_irq(&desc->lock);
+		}
+	}
+
+	return 0;
+}
+
+int make_irq_threaded(int irq, struct irq_desc *desc)
+{
+	int ret;
+	rcu_read_lock();
+	ret = __make_irq_threaded(irq, desc);
+	rcu_read_unlock();
+	return ret;
+}
+#else /* CONFIG_CHANGE_IRQ_CONTEXT */
+int make_action_nodelay(int irq, struct irqaction *act)
+{
+	return -EINVAL;
+}
+int make_irq_nodelay(int irq, struct irq_desc *desc)
+{
+	unsigned long flags;
+	struct irqaction *act;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	for(act = desc->action; act; act = act->next) {
+		WARN_ON((~act->flags) & (SA_MUST_THREAD|SA_NODELAY) == 0);
+		if(act->flags & SA_MUST_THREAD)
+			return -EINVAL;
+	}
+
+	desc->status |= IRQ_NODELAY;
+	spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+
+#endif /* !CONFIG_CHANGE_IRQ_CONTEXT */
+
  /*
   * Internal function to register an irqaction - typically used to
   * allocate special interrupts that are part of the architecture.
@@ -239,11 +386,28 @@ int setup_irq(unsigned int irq, struct i
  		rand_initialize_irq(irq);
  	}

+	WARN_ON(((~new->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
  	if (!(new->flags & SA_NODELAY))
  		if (start_irq_thread(irq, desc))
  			return -ENOMEM;
+
+	if( (desc->status & IRQ_NODELAY) && (new->flags & SA_MUST_THREAD) ) {
+		int ret = make_action_nodelay(irq, new);
+		if(ret)
+			return ret;
+	}
+
+
+	if( (new->flags & SA_NODELAY) && !(desc->status & IRQ_NODELAY) ) {
+		int ret = make_irq_nodelay(irq, desc);
+		if(ret)
+			return ret;
+	}
+
+
  	/*
-	 * The following block of code has to be executed atomically
+	 * The following block of code has to be executed atomically due
+	 * to the hard irq context
  	 */
  	spin_lock_irqsave(&desc->lock, flags);
  	p = &desc->action;
@@ -262,7 +426,7 @@ int setup_irq(unsigned int irq, struct i
  		shared = 1;
  	}

-	*p = new;
+	rcu_assign_pointer(*p, new);

  	/*
  	 * Propagate any possible SA_NODELAY flag into IRQ_NODELAY:
@@ -282,13 +446,25 @@ int setup_irq(unsigned int irq, struct i

  	new->irq = irq;
  	register_irq_proc(irq);
-	new->dir = new->threaded = NULL;
+	new->dir = NULL;
  	register_handler_proc(irq, new);

  	return 0;
  }

  /**
+ *      free_irqaction_rcu - free the actual memory block of a struct irqaction
+ *      @rcu: Pointer to the struct rcu_head
+ *
+ *      Is called after RCU syncronization
+ */
+static void free_irqaction_rcu(struct rcu_head *rcu)
+{
+	struct irqaction *p = container_of(rcu, struct irqaction, rcu);
+	kfree(p);
+}
+
+/**
   *	free_irq - free an interrupt
   *	@irq: Interrupt line to free
   *	@dev_id: Device identity to free
@@ -312,6 +488,7 @@ void free_irq(unsigned int irq, void *de
  		return;

  	desc = irq_desc + irq;
+
  	spin_lock_irqsave(&desc->lock, flags);
  	p = &desc->action;
  	for (;;) {
@@ -325,7 +502,7 @@ void free_irq(unsigned int irq, void *de
  				continue;

  			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
+			rcu_assign_pointer(*pp, action->next);

  			/* Currently used only by UML, might disappear one day.*/
  #ifdef CONFIG_IRQ_RELEASE_METHOD
@@ -344,9 +521,8 @@ void free_irq(unsigned int irq, void *de
  			spin_unlock_irqrestore(&desc->lock, flags);
  			unregister_handler_proc(irq, action);

-			/* Make sure it's not being used on another CPU */
  			synchronize_irq(irq);
-			kfree(action);
+			call_rcu(&action->rcu, free_irqaction_rcu);
  			return;
  		}
  		printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
@@ -358,12 +534,14 @@ void free_irq(unsigned int irq, void *de
  EXPORT_SYMBOL(free_irq);

  /**
- *	request_irq - allocate an interrupt line
+ *	request_irq2 - allocate an interrupt line
   *	@irq: Interrupt line to allocate
   *	@handler: Function to be called when the IRQ occurs
   *	@irqflags: Interrupt type flags
   *	@devname: An ascii name for the claiming device
   *	@dev_id: A cookie passed back to the handler function
+ *      @change_context: call back for requesting a change of context
+ *       (threaded vs hardirq)
   *
   *	This call allocates interrupt resources and enables the
   *	interrupt line and IRQ handling. From the point this
@@ -381,18 +559,23 @@ EXPORT_SYMBOL(free_irq);
   *
   *	Flags:
   *
- *	SA_SHIRQ		Interrupt is shared
+ *	SA_SHIRQ		Interrupt can be shared
   *	SA_INTERRUPT		Disable local interrupts while processing
   *	SA_SAMPLE_RANDOM	The interrupt can be used for entropy
+ *      SA_NODELAY              The handler must run in hardirq context
+ *      SA_MUSTTHREAD           The handler can not run in hardirq context
   *
   */
-int request_irq(unsigned int irq,
-		irqreturn_t (*handler)(int, void *, struct pt_regs *),
-		unsigned long irqflags, const char * devname, void *dev_id)
+int request_irq2(unsigned int irq,
+		 irqreturn_t (*handler)(int, void *, struct pt_regs *),
+		 unsigned long irqflags, const char * devname, void *dev_id,
+		 int (*change_context)(int, void *,
+				       enum change_context_cmd))
  {
  	struct irqaction * action;
  	int retval;

+
  	/*
  	 * Sanity-check: shared interrupts must pass in a real dev-ID,
  	 * otherwise we'll have trouble later trying to figure out
@@ -416,8 +599,11 @@ int request_irq(unsigned int irq,
  	action->flags = irqflags;
  	cpus_clear(action->mask);
  	action->name = devname;
-	action->next = NULL;
  	action->dev_id = dev_id;
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+	action->change_context = change_context;
+#endif
+	action->next = NULL;

  	select_smp_affinity(irq);

@@ -427,7 +613,17 @@ int request_irq(unsigned int irq,

  	return retval;
  }
+EXPORT_SYMBOL(request_irq2);

+int request_irq(unsigned int irq,
+		irqreturn_t (*handler)(int, void *, struct pt_regs *),
+		unsigned long irqflags, const char * devname, void *dev_id)
+{
+	if( !(irqflags & SA_NODELAY) )
+		irqflags |= SA_MUST_THREAD_RT;
+
+	return request_irq2(irq, handler, irqflags, devname, dev_id, NULL);
+}
  EXPORT_SYMBOL(request_irq);

  /**
Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/proc.c
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/proc.c
+++ linux-2.6.16-rt23.spin_mutex/kernel/irq/proc.c
@@ -11,6 +11,7 @@
  #include <linux/profile.h>
  #include <linux/proc_fs.h>
  #include <linux/interrupt.h>
+#include <linux/ctype.h>

  #include "internals.h"

@@ -83,6 +84,138 @@ static int irq_affinity_write_proc(struc

  #endif

+
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+/*
+ * The /proc/irq/<irq>/threaded values:
+ */
+static struct proc_dir_entry *irq_threaded_entry[NR_IRQS];
+
+static const char *sched_policies[] = {
+	"normal",
+	"fifo",
+	"rr",
+	"batch"
+};
+
+static int irq_threaded_read_proc(char *page, char **start, off_t off,
+				  int count, int *eof, void *data)
+{
+	unsigned int irq = (int)(long)data;
+	task_t *t;
+
+	if ( (irq_desc[irq].status & IRQ_NODELAY)
+	    || !irq_desc[irq].thread ) {
+		return scnprintf(page, count, "IRQ\n");
+	}
+
+	t = irq_desc[irq].thread;
+	if (t->policy==SCHED_NORMAL &&  t->policy == SCHED_BATCH )
+		return scnprintf(page, count, "%s %d\n",
+				 sched_policies[t->policy],
+				 task_prio(t));
+	else
+		return scnprintf(page, count, "%s %lu\n",
+				 sched_policies[t->policy],
+				 t->rt_priority);
+}
+
+
+static int set_rt_prio(int irq, int policy, char *arg)
+{
+	char *tmp;
+	struct sched_param prm;
+	int ret;
+	task_t *t;
+
+	while(isspace(*arg))
+		arg++;
+
+	if(*arg) {
+		prm.sched_priority = simple_strtol(arg,&tmp,10);
+		if (tmp == arg)
+			return -EINVAL;
+	}
+	else
+	  return -EINVAL;
+
+	ret = make_irq_threaded(irq, &irq_desc[irq]);
+	if(ret)
+		return ret;
+
+	t = irq_desc[irq].thread;
+	return sched_setscheduler(t, policy, &prm);
+}
+
+static int set_nonrt_prio(int irq, int policy, char *arg)
+{
+	char *tmp;
+	long prio;
+	int ret;
+	struct sched_param prm;
+	task_t *t;
+
+	while(isspace(*arg))
+		arg++;
+
+
+	if(*arg) {
+		prio = simple_strtol(arg,&tmp,10);
+		if (tmp == arg || prio < -20 || prio > 19)
+			return -EINVAL;
+	}
+	else
+		prio = 0;
+
+	ret = make_irq_threaded(irq, &irq_desc[irq]);
+	if(ret)
+		return ret;
+
+	t = irq_desc[irq].thread;
+	prm.sched_priority = 0;
+	ret = sched_setscheduler(t, policy, &prm);
+	if(ret)
+		return ret;
+
+	set_user_nice(t, prio);
+	return 0;
+}
+
+static int irq_threaded_write_proc(struct file *file,
+				   const char __user *buffer,
+				   unsigned long count, void *data)
+{
+	unsigned int irq = (int)(long)data;
+	int ret = 0;
+	char buf[20];
+
+	if(copy_from_user( (void*)&buf[0], (void*)buffer,
+			   count>20 ? 20: count ) )
+		return -EFAULT;
+
+	buf[19] = 0;
+
+	if(!strnicmp(buf,"irq",3))
+ 		ret = make_irq_nodelay(irq, &irq_desc[irq]);
+	else if(!strnicmp(buf,"normal",3))
+		ret = set_nonrt_prio(irq, SCHED_NORMAL, buf+6);
+	else if(!strnicmp(buf,"fifo",3))
+		ret = set_rt_prio(irq, SCHED_FIFO, buf+4);
+	else  if(!strnicmp(buf,"rr",2))
+		ret = set_rt_prio(irq, SCHED_RR, buf+2);
+	else  if(!strnicmp(buf,"batch",3))
+		ret = set_nonrt_prio(irq, SCHED_BATCH, buf+5);
+	else
+		ret = -EINVAL;
+
+
+	if(ret)
+		return ret;
+	else
+		return count;
+}
+#endif /* CONFIG_CHANGE_IRQ_CONTEXT */
+
  #define MAX_NAMELEN 10

  void register_irq_proc(unsigned int irq)
@@ -116,58 +249,33 @@ void register_irq_proc(unsigned int irq)
  		smp_affinity_entry[irq] = entry;
  	}
  #endif
+
+#ifdef CONFIG_CHANGE_IRQ_CONTEXT
+	{
+		struct proc_dir_entry *entry;
+
+		/* create /proc/irq/<irq>/threaded */
+		entry = create_proc_entry("threaded", 0600, irq_dir[irq]);
+
+		if (entry) {
+			entry->nlink = 1;
+			entry->data = (void *)(long)irq;
+			entry->read_proc = irq_threaded_read_proc;
+			entry->write_proc = irq_threaded_write_proc;
+		}
+		irq_threaded_entry[irq] = entry;
+	}
+#endif
  }

  #undef MAX_NAMELEN

  void unregister_handler_proc(unsigned int irq, struct irqaction *action)
  {
-	if (action->threaded)
-		remove_proc_entry(action->threaded->name, action->dir);
  	if (action->dir)
  		remove_proc_entry(action->dir->name, irq_dir[irq]);
  }

-#ifndef CONFIG_PREEMPT_RT
-
-#ifndef CONFIG_PREEMPT_RT
-
-static int threaded_read_proc(char *page, char **start, off_t off,
-			      int count, int *eof, void *data)
-{
-	return sprintf(page, "%c\n",
-		((struct irqaction *)data)->flags & SA_NODELAY ? '0' : '1');
-}
-
-static int threaded_write_proc(struct file *file, const char __user *buffer,
-			       unsigned long count, void *data)
-{
-	int c;
-	struct irqaction *action = data;
-	irq_desc_t *desc = irq_desc + action->irq;
-
-	if (get_user(c, buffer))
-		return -EFAULT;
-	if (c != '0' && c != '1')
-		return -EINVAL;
-
-	spin_lock_irq(&desc->lock);
-
-	if (c == '0')
-		action->flags |= SA_NODELAY;
-	if (c == '1')
-		action->flags &= ~SA_NODELAY;
-	recalculate_desc_flags(desc);
-
-	spin_unlock_irq(&desc->lock);
-
-	return 1;
-}
-
-#endif
-
-#endif
-
  #define MAX_NAMELEN 128

  static int name_unique(unsigned int irq, struct irqaction *new_action)
@@ -197,20 +305,6 @@ void register_handler_proc(unsigned int
  	action->dir = proc_mkdir(name, irq_dir[irq]);
  	if (!action->dir)
  		return;
-#ifndef CONFIG_PREEMPT_RT
-	{
-		struct proc_dir_entry *entry;
-		/* create /proc/irq/1234/handler/threaded */
-		entry = create_proc_entry("threaded", 0600, action->dir);
-		if (!entry)
-			return;
-		entry->nlink = 1;
-		entry->data = (void *)action;
-		entry->read_proc = threaded_read_proc;
-		entry->write_proc = threaded_write_proc;
-		action->threaded = entry;
-	}
-#endif
  }

  #undef MAX_NAMELEN

--

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
       [not found] <20060602165336.147812000@localhost>
  2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen
  2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen
@ 2006-06-02 22:23 ` Esben Nielsen
  2006-06-03 20:39   ` Steven Rostedt
  2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen
  2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen
  4 siblings, 1 reply; 11+ messages in thread
From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

Makes it possible for the e100 ethernet driver to have it's interrupt handler
in both hard-irq and threaded context under PREEMPT_RT.

Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c
+++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
@@ -530,7 +530,7 @@ struct nic {
  	enum ru_state ru_running;

  	spinlock_t cb_lock			____cacheline_aligned;
-	spinlock_t cmd_lock;
+	spin_mutex_t cmd_lock;
  	struct csr __iomem *csr;
  	enum scb_cmd_lo cuc_cmd;
  	unsigned int cbs_avail;
@@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic
  	return 0;
  }

+static int e100_change_context(int irq, void *dev_id,
+			       enum change_context_cmd cmd)
+{
+	struct net_device *netdev = dev_id;
+	struct nic *nic = netdev_priv(netdev);
+
+	switch(cmd)
+	{
+	case IRQ_TO_HARDIRQ:
+		if(!spin_mutexes_can_spin())
+			return -ENOSYS;
+
+		spin_mutex_to_spin(&nic->cmd_lock);
+		break;
+	case IRQ_CAN_THREAD:
+		/* Ok - return 0 */
+		break;
+	case IRQ_TO_THREADED:
+		spin_mutex_to_mutex(&nic->cmd_lock);
+		break;
+	}
+	return 0; /* Ok */
+}
+
  static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs)
  {
  	struct net_device *netdev = dev_id;
@@ -2064,9 +2088,12 @@ static int e100_up(struct nic *nic)
  	e100_set_multicast_list(nic->netdev);
  	e100_start_receiver(nic, NULL);
  	mod_timer(&nic->watchdog, jiffies);
-	if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
-		nic->netdev->name, nic->netdev)))
+	if((err = request_irq2(nic->pdev->irq, e100_intr,
+			       SA_SHIRQ|SA_MUST_THREAD_RT,
+			       nic->netdev->name, nic->netdev,
+			       e100_change_context)))
  		goto err_no_irq;
+
  	netif_wake_queue(nic->netdev);
  	netif_poll_enable(nic->netdev);
  	/* enable ints _after_ enabling poll, preventing a race between

--

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
       [not found] <20060602165336.147812000@localhost>
                   ` (2 preceding siblings ...)
  2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen
@ 2006-06-02 22:23 ` Esben Nielsen
  2006-06-03 21:30   ` Steven Rostedt
  2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen
  4 siblings, 1 reply; 11+ messages in thread
From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

Makes it possible for the 8250 serial driver to have it's interrupt handler
in both hard-irq and threaded context under PREEMPT_RT.

Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c
+++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
@@ -140,7 +140,7 @@ struct uart_8250_port {
  };

  struct irq_info {
-	spinlock_t		lock;
+	spin_mutex_t		lock;
  	struct list_head	*head;
  };

@@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_
  	spin_unlock(&up->port.lock);
  }

+
+static int serial8250_change_context(int irq, void *dev_id,
+				     enum change_context_cmd cmd)
+{
+	struct irq_info *i = dev_id;
+	struct list_head *l;
+
+	switch(cmd) {
+	case IRQ_TO_HARDIRQ:
+		/* Spin mutexes not available: */
+		if(!spin_mutexes_can_spin())
+			return -ENOSYS;
+
+		/* First do the inner locks */
+		l = i->head;
+		do {
+			struct uart_8250_port *up;
+			up = list_entry(l, struct uart_8250_port, list);
+			spin_mutex_to_spin(&up->port.lock);
+			l = l->next;
+		} while(l != i->head && l != NULL);
+		spin_mutex_to_spin(&i->lock);
+		break;
+	case IRQ_CAN_THREAD:
+		break;
+	case IRQ_TO_THREADED:
+		spin_mutex_to_mutex(&i->lock);
+		l = i->head;
+
+		do {
+			struct uart_8250_port *up;
+			up = list_entry(l, struct uart_8250_port, list);
+			spin_mutex_to_mutex(&up->port.lock);
+			l = l->next;
+		} while(l != i->head && l != NULL);
+		break;
+	}
+
+	return 0;
+}
+
  /*
   * This is the serial driver's interrupt routine.
   *
@@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct
  		i->head = &up->list;
  		spin_unlock_irq(&i->lock);

-		ret = request_irq(up->port.irq, serial8250_interrupt,
-				  irq_flags, "serial", i);
+		ret = request_irq2(up->port.irq, serial8250_interrupt,
+				   irq_flags | SA_MUST_THREAD_RT,
+				   "serial", i,
+				   serial8250_change_context);
  		if (ret < 0)
  			serial_do_unlink(i, up);
  	}
Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h
+++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
@@ -206,7 +206,7 @@ struct uart_icount {
  typedef unsigned int __bitwise__ upf_t;

  struct uart_port {
-	spinlock_t		lock;			/* port lock */
+	spin_mutex_t		lock;			/* port lock */
  	unsigned int		iobase;			/* in/out[bwl] */
  	unsigned char __iomem	*membase;		/* read/write[bwl] */
  	unsigned int		irq;			/* irq number */

--

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [patch 5/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
       [not found] <20060602165336.147812000@localhost>
                   ` (3 preceding siblings ...)
  2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen
@ 2006-06-02 22:23 ` Esben Nielsen
  4 siblings, 0 replies; 11+ messages in thread
From: Esben Nielsen @ 2006-06-02 22:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: Ingo Molnar

Make lpptest more compliant with the request_irq() interface. I don't think
the driver should set or remove flags on it's own!

Index: linux-2.6.16-rt23.spin_mutex/drivers/char/lpptest.c
===================================================================
--- linux-2.6.16-rt23.spin_mutex.orig/drivers/char/lpptest.c
+++ linux-2.6.16-rt23.spin_mutex/drivers/char/lpptest.c
@@ -150,13 +150,13 @@ static int __init lpptest_init (void)
  		return -EAGAIN;
  	}

-	if (request_irq (LPPTEST_IRQ, lpptest_irq, 0, "lpptest", dev_id)) {
-		printk (KERN_WARNING "lpptest: irq %d in use. Unload parport module!\n", LPPTEST_IRQ);
+	if (request_irq (LPPTEST_IRQ, lpptest_irq, SA_NODELAY | SA_INTERRUPT,
+			 "lpptest", dev_id)) {
+		printk (KERN_WARNING "lpptest: irq %d in use. "
+			"Unload parport module!\n", LPPTEST_IRQ);
  		unregister_chrdev(LPPTEST_CHAR_MAJOR, LPPTEST_DEVICE_NAME);
  		return -EAGAIN;
  	}
-	irq_desc[LPPTEST_IRQ].status |= IRQ_NODELAY;
-	irq_desc[LPPTEST_IRQ].action->flags |= SA_NODELAY | SA_INTERRUPT;

  	INIT_PORT();
  	ENABLE_IRQ();

--

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen
@ 2006-06-03 20:21   ` Steven Rostedt
  2006-06-04 17:33     ` Esben Nielsen
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2006-06-03 20:21 UTC (permalink / raw)
  To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar

Disclaimer:  I haven't read all your patches yet.  I'm going one at a
time to comment, and then I will probably send more emails about the
overall response. So now, my comments are not on the big picture, but
simple atomic views.

On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
> This patch makes it possible to change which context the interrupt handlers
> for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is 
> set.
> 
> The interface is the file
>   /proc/irq/<irq number>/threaded
> You can read it to see what the context is now or write one of
> 
>   irq
>   fifo <rt priority>
>   rr <rt priority>
>   normal <nice value>
>   batch <nice value>
> 
> where one of the latter makes the interrupt handler threaded.
> 
> A replacement for request_irq(), called request_irq2(), is added. When a driver

request_irq2 ... yuck!  Perhaps request_irq_convertible() or something
similar?  But irq2, no way!

> uses this to install it's irq-handler it can also give a change_irq_context
> call-back. This call-back is called whenever the irq-context is changed or 
> going to be changed. The call-back must be of the form
> 
> int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd)

Eeee, looks like a big change to make on drivers, and something that can
keep -rt from mainline forever.  But I'll see more as I read.  This
looks optional but still it will make things more complex.

> 
> where
> 
> enum change_context_cmd {
>  	IRQ_TO_HARDIRQ,
>  	IRQ_CAN_THREAD,
>  	IRQ_TO_THREADED
> };
> 
> The call-back is supposed to do the following on
>   IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking
>                   or return a non-zero error code.
>   IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in
>                   thread context. Return a non-zero error code otherwise.
>   IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The
>                  driver can now change it's locks to being mutexes. The return
>                  value is ignored as the driver already got a chance to protest
>                  above.
> 
> Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h
> +++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
> @@ -34,21 +34,38 @@ typedef int irqreturn_t;
>   #define IRQ_HANDLED	(1)
>   #define IRQ_RETVAL(x)	((x) != 0)
> 
> +enum change_context_cmd {
> +	IRQ_TO_HARDIRQ,
> +	IRQ_CAN_THREAD,
> +	IRQ_TO_THREADED
> +};
> +
>   struct irqaction {
>   	irqreturn_t (*handler)(int, void *, struct pt_regs *);
>   	unsigned long flags;
>   	cpumask_t mask;
>   	const char *name;
>   	void *dev_id;
> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
> +	int (*change_context)(int, void *,
> +			      enum change_context_cmd);
> +#endif
>   	struct irqaction *next;
>   	int irq;
> -	struct proc_dir_entry *dir, *threaded;
> +	struct proc_dir_entry *dir;
> +	struct rcu_head rcu;
>   };
> 
>   extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
>   extern int request_irq(unsigned int,
>   		       irqreturn_t (*handler)(int, void *, struct pt_regs *),
>   		       unsigned long, const char *, void *);
> +extern int request_irq2(unsigned int irq,
> +			irqreturn_t (*handler)(int, void *, struct pt_regs *),
> +			unsigned long irqflags, const char * devname,
> +			void *dev_id,
> +			int (*change_context)(int, void *,
> +					      enum change_context_cmd));
>   extern void free_irq(unsigned int, void *);
> 
> 
> Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h
> +++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
> @@ -47,6 +47,18 @@
>   # define SA_NODELAY 0x01000000
>   #endif
> 
> +#ifndef SA_MUST_THREAD
> +# define SA_MUST_THREAD 0x02000000
> +#endif
> +
> +/* Set this flag if the irq handler must thread under preempt-rt otherwise not
> + */
> +#ifdef CONFIG_PREEMPT_RT
> +# define SA_MUST_THREAD_RT SA_MUST_THREAD
> +#else
> +# define SA_MUST_THREAD_RT 0
> +#endif
> +
>   /*
>    * IRQ types
>    */
> @@ -147,12 +159,13 @@ struct irq_type {
>    * @chip:		low level hardware access functions - comes from type
>    * @action:		the irq action chain
>    * @status:		status information
> + *                      (protected by the spinlock )
>    * @depth:		disable-depth, for nested irq_disable() calls
>    * @irq_count:		stats field to detect stalled irqs
>    * @irqs_unhandled:	stats field for spurious unhandled interrupts
>    * @thread:		Thread pointer for threaded preemptible irq handling
>    * @wait_for_handler:	Waitqueue to wait for a running preemptible handler
> - * @lock:		locking for SMP
> + * @lock:		lock around the action list
>    * @move_irq:		Flag need to re-target interrupt destination
>    *
>    * Pad this out to 32 bytes for cache and indexing reasons.
> Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt
> +++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
> @@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS
> 
>   	  Say N if you are unsure.
> 
> +config CHANGE_IRQ_CONTEXT
> +	bool "Change the irq context runtime"
> +	depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES)
> +	help
> +	  You can change wether the IRQ handler(s) for each IRQ number is
> +          running in hardirq context or as threaded by writing to
> +          /proc/irq/<number>/threaded
> +          If PREEMPT_RT is selected the drivers involved must be ready for it,
> +          though, or the write will fail. Remember to switch on SPIN_MUTEXES as
> +          well in that case as the drivers which are ready uses spin-mutexes.
> +
>   config SPINLOCK_BKL
>   	bool "Old-Style Big Kernel Lock"
>   	depends on (PREEMPT || SMP) && !PREEMPT_RT
> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h
> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
> @@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d
>   }
> 
>   #ifdef CONFIG_PROC_FS
> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
> +extern int make_irq_nodelay(int irq, struct irq_desc *desc);
> +extern int make_irq_threaded(int irq, struct irq_desc *desc);
> +#endif
>   extern void register_irq_proc(unsigned int irq);
>   extern void register_handler_proc(unsigned int irq, struct irqaction *action);
>   extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c
> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
> @@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un
>   	return !action;
>   }
> 
> +
> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
> +int make_action_nodelay(int irq, struct irqaction *act)
> +{
> +	unsigned long flags;
> +
> +	if(!(act->flags & SA_MUST_THREAD)) {
> +		return 0;
> +	}

Remove the brackets.

Also, let me get this straight.  If the action does not have
SA_MUST_THREAD, we return?  Or does it mean that if SA_MUST_THREAD is
not set, then SA_NODELAY must already be set? If that is the case, then
why are we not checking for SA_NODELAY here?

> +
> +	if( act->change_context ) {
> +		int ret =
> +			act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ);
> +		if(ret) {
> +			printk(KERN_ERR "Failed to change irq handler "
> +			       "for dev %s on IRQ%d to hardirq "
> +			       "context (ret: %d)\n", act->name, irq, ret);
> +			return ret;
> +		}
> +		spin_lock_irqsave(&irq_desc[irq].lock, flags);
> +		act->flags &=~SA_MUST_THREAD;

Both flags are zero here (see below about the WARN_ON)

> +		act->flags |= SA_NODELAY;
> +		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
> +		return 0;
> +	}
> +	else {
> +		printk(KERN_ERR "Irq handler "
> +		       "for dev %s on IRQ%d can not be changed"
> +		       " to hardirq context\n", act->name, irq);
> +		return -ENOSYS;
> +	}
> +
> +}
> +
> +
> +static int __make_irq_threaded(int irq, struct irq_desc *desc);
> +
> +int make_irq_nodelay(int irq, struct irq_desc *desc)
> +{
> +	int ret = 0;
> +	struct irqaction *act;
> +	unsigned long flags;
> +
> +	rcu_read_lock();
> +	for(act = desc->action; act; act = act->next) {
> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);

This WARN_ON is not protected by the descriptor lock, so if this
function is run on two CPUs at the same time, then the act->flags can
have both of these zero by the above code.

> +		if(act->flags & SA_MUST_THREAD) {
> +			ret = make_action_nodelay(irq, act);
> +			if(ret) {
> +				printk(KERN_ERR "Could not make irq %d "
> +				       "nodelay (errno %d)\n",
> +				       irq, ret);

We printk on failure from above, and then printk again here?

> +				goto failed;
> +			}
> +		}
> +	}
> +	spin_lock_irqsave(&desc->lock, flags);
> +	desc->status |= IRQ_NODELAY;
> +	spin_unlock_irqrestore(&desc->lock, flags);
> +	rcu_read_unlock();
> +
> +	return 0;
> + failed:
> +	__make_irq_threaded(irq, desc);
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +int action_may_thread(int irq, struct irqaction *act)
> +{
> +	if(!act->change_context)
> +		return !(act->flags & SA_NODELAY);
> +
> +	return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0;

This IRQ_CAN_THREAD just looks out of place of the other two.  It feels
very "hacky" to check if it can thread.  But what do I know?

> +}
> +
> +
> +static int __make_irq_threaded(int irq, struct irq_desc *desc)
> +{
> +	struct irqaction *act;
> +
> +	for(act = desc->action; act; act = act->next) {
> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
> +		if(!action_may_thread(irq, act)) {
> +			return -EINVAL;
> +		}
> +	}
> +
> +	if (start_irq_thread(irq, desc))
> +		return -ENOMEM;

I know that currently start_irq_thread can only return -ENOMEM on
failure, but it may be more robust to capture the return code and return
that anyway.

> +
> +	spin_lock_irq(&desc->lock);
> +	desc->status &= ~IRQ_NODELAY;
> +	spin_unlock_irq(&desc->lock);
> +
> +	/* We can't make irq handlers change their context before we
> +	   are sure no CPU is running them in hard irq context */
> +	synchronize_irq(irq);
> +
> +	for(act = desc->action; act; act = act->next) {
> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
> +		if(act->change_context) {
> +			/* This callback can't fail */

Will all device drivers know that the callback can't fail?

> +			act->change_context(irq, act->dev_id, IRQ_TO_THREADED);
> +			spin_lock_irq(&desc->lock);
> +			act->flags &=~SA_NODELAY;
> +			act->flags |= SA_MUST_THREAD;
> +			spin_unlock_irq(&desc->lock);
> +		}
> +	}
> +
> +	return 0;
> +}

[snipped the rest]

-- Steve



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen
@ 2006-06-03 20:39   ` Steven Rostedt
  2006-06-04 17:34     ` Esben Nielsen
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2006-06-03 20:39 UTC (permalink / raw)
  To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar

On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
> Makes it possible for the e100 ethernet driver to have it's interrupt handler
> in both hard-irq and threaded context under PREEMPT_RT.
> 
> Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c
> +++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
> @@ -530,7 +530,7 @@ struct nic {
>   	enum ru_state ru_running;
> 
>   	spinlock_t cb_lock			____cacheline_aligned;
> -	spinlock_t cmd_lock;
> +	spin_mutex_t cmd_lock;
>   	struct csr __iomem *csr;
>   	enum scb_cmd_lo cuc_cmd;
>   	unsigned int cbs_avail;
> @@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic
>   	return 0;
>   }
> 
> +static int e100_change_context(int irq, void *dev_id,
> +			       enum change_context_cmd cmd)
> +{
> +	struct net_device *netdev = dev_id;
> +	struct nic *nic = netdev_priv(netdev);
> +
> +	switch(cmd)
> +	{
> +	case IRQ_TO_HARDIRQ:
> +		if(!spin_mutexes_can_spin())
> +			return -ENOSYS;
> +
> +		spin_mutex_to_spin(&nic->cmd_lock);
> +		break;
> +	case IRQ_CAN_THREAD:
> +		/* Ok - return 0 */
> +		break;

Why even bother with the IRQ_CAN_THREAD.  If this would be anything
other than OK, then we shouldn't be using that request_irq2 (yuck!) call
in the first place.

-- Steve

> +	case IRQ_TO_THREADED:
> +		spin_mutex_to_mutex(&nic->cmd_lock);
> +		break;
> +	}
> +	return 0; /* Ok */
> +}
> +
>   static irqreturn_t e100_intr(int irq, void *dev_id, struct pt_regs *regs)
>   {
>   	struct net_device *netdev = dev_id;
> @@ -2064,9 +2088,12 @@ static int e100_up(struct nic *nic)
>   	e100_set_multicast_list(nic->netdev);
>   	e100_start_receiver(nic, NULL);
>   	mod_timer(&nic->watchdog, jiffies);
> -	if((err = request_irq(nic->pdev->irq, e100_intr, SA_SHIRQ,
> -		nic->netdev->name, nic->netdev)))
> +	if((err = request_irq2(nic->pdev->irq, e100_intr,
> +			       SA_SHIRQ|SA_MUST_THREAD_RT,
> +			       nic->netdev->name, nic->netdev,
> +			       e100_change_context)))
>   		goto err_no_irq;
> +
>   	netif_wake_queue(nic->netdev);
>   	netif_poll_enable(nic->netdev);
>   	/* enable ints _after_ enabling poll, preventing a race between
> 



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen
@ 2006-06-03 21:30   ` Steven Rostedt
  2006-06-04 22:50     ` Esben Nielsen
  0 siblings, 1 reply; 11+ messages in thread
From: Steven Rostedt @ 2006-06-03 21:30 UTC (permalink / raw)
  To: Esben Nielsen; +Cc: linux-kernel, Ingo Molnar

On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
> Makes it possible for the 8250 serial driver to have it's interrupt handler
> in both hard-irq and threaded context under PREEMPT_RT.
> 
> Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c
> +++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
> @@ -140,7 +140,7 @@ struct uart_8250_port {
>   };
> 
>   struct irq_info {
> -	spinlock_t		lock;
> +	spin_mutex_t		lock;
>   	struct list_head	*head;
>   };
> 
> @@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_
>   	spin_unlock(&up->port.lock);
>   }
> 
> +
> +static int serial8250_change_context(int irq, void *dev_id,
> +				     enum change_context_cmd cmd)
> +{
> +	struct irq_info *i = dev_id;
> +	struct list_head *l;
> +
> +	switch(cmd) {
> +	case IRQ_TO_HARDIRQ:
> +		/* Spin mutexes not available: */
> +		if(!spin_mutexes_can_spin())
> +			return -ENOSYS;
> +
> +		/* First do the inner locks */
> +		l = i->head;
> +		do {
> +			struct uart_8250_port *up;
> +			up = list_entry(l, struct uart_8250_port, list);
> +			spin_mutex_to_spin(&up->port.lock);
> +			l = l->next;
> +		} while(l != i->head && l != NULL);

Now this shows the flaw in your whole design.  Don't get me wrong, I'm
very impressed that you got this working for you.  But for this to be
accepted, it can't be too intrusive.  It is also very error prone since
you must know not only the locks in the handlers, but also the locks in
all the functions that you call. And each device must update them.

Two things:

1) What if you have two serials, and one is threaded and one is not.  It
might mutex a lock that should be a spin.

2) You forgot to update sysrq_key_table_lock.  That can be called from
the serial interrupt as well.  So even just working with two devices
it's not trivial to get things right.  Imagine what it would take for
the whole kernel.  Not to mention the maintenance nightmare it would
cause.

I do give you an A+ for effort, but I'm not sure if this is the
solution.  Frankly, it scares me, unless you can find a sure fire way to
update all the locks properly, that would not cause problems elsewhere.

Maybe the -rt way can introduce a new paradigm.  Maybe we shouldn't have
top halves at all, but instead a way to register threads to devices.
Think about it, the big picture, what are interrupts for?  They are to
tell us that an event happened, and if there is nothing waiting for that
event, then it was worthless.  So really, what we could do (and this
would perhaps also be good for mainline, although it would be very hard
to get accepted) is to have registering functions for each device.

So when a thread waits on a device, it is registered with the irq that
the device has, instead of a wait queue.  So each interrupt would have a
priority sorted list of threads that are waiting for that interrupt.
When the interrupt happens, the highest priority thread would wake up to
handle it, instead of an interrupt handler. If the interrupt belonged to
another thread, the one that was woken up would pass it off to the
thread waiting and go back to sleep.

This also allows the interrupt handlers to schedule and even write to
user space, once it finds out that the thread working on the interrupt
is the thread in question.

Let's say that we have a network card, which are commonly shared with
other devices, and it also has to do a lot before it knows what process
it is working on for.  And lets just say it is shared (uncommonly) with
a IDE device.

Now an interrupt comes in on this line.  There's a list of all the
highest priority tasks waiting on each device (not all tasks, just the
highest for each device, like the pi_list in the rt code).  So the
highest priority tasks for each device is woken up (the highest for the
network card and the highest for the IDE) and the interrupt handler
would return (not unlike the irq threads of the current -rt, except, we
wouldn't have irq threads but tasks that are waiting on them).  These
tasks would be in the registering function of the device, which would
know how to handle the interrupt.  Now if the current process is higher
in priority, then it wont be preempted by this interrupt. But if a
higher priority process is waiting, then the current process is
preempted.  So this is like interrupt handlers inheriting priorities of
processes waiting.

So the process now checks to see if the interrupt belongs to it, and if
it does, then simply handle the case.  Otherwise, if it does not belong
to the device, simply go back to sleep and wait again.  If this belongs
to the device, but for another waiter, using our example, the network
card, the process realized that it belonged to another process on
another port, it could wake up that process and pass information on how
much work the first process has done, so that the other process doesn't
repeat the work.

I'm just making this up as I go, but it really looks like this can be
examined more in an academic arena.  Perhaps it would even lead to less
context switches, since the process waiting would already be woken up on
interrupt.  The above idea is based on what interrupts are for.  They
don't happen for no reasons, but there's almost always someone waiting
for it.

-- Steve

> +		spin_mutex_to_spin(&i->lock);
> +		break;
> +	case IRQ_CAN_THREAD:
> +		break;
> +	case IRQ_TO_THREADED:
> +		spin_mutex_to_mutex(&i->lock);
> +		l = i->head;
> +
> +		do {
> +			struct uart_8250_port *up;
> +			up = list_entry(l, struct uart_8250_port, list);
> +			spin_mutex_to_mutex(&up->port.lock);
> +			l = l->next;
> +		} while(l != i->head && l != NULL);
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
>   /*
>    * This is the serial driver's interrupt routine.
>    *
> @@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct
>   		i->head = &up->list;
>   		spin_unlock_irq(&i->lock);
> 
> -		ret = request_irq(up->port.irq, serial8250_interrupt,
> -				  irq_flags, "serial", i);
> +		ret = request_irq2(up->port.irq, serial8250_interrupt,
> +				   irq_flags | SA_MUST_THREAD_RT,
> +				   "serial", i,
> +				   serial8250_change_context);
>   		if (ret < 0)
>   			serial_do_unlink(i, up);
>   	}
> Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
> ===================================================================
> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h
> +++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
> @@ -206,7 +206,7 @@ struct uart_icount {
>   typedef unsigned int __bitwise__ upf_t;
> 
>   struct uart_port {
> -	spinlock_t		lock;			/* port lock */
> +	spin_mutex_t		lock;			/* port lock */
>   	unsigned int		iobase;			/* in/out[bwl] */
>   	unsigned char __iomem	*membase;		/* read/write[bwl] */
>   	unsigned int		irq;			/* irq number */
> 
> --
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
-- 
Steven Rostedt
Senior Programmer
Kihon Technologies
(607)786-4830


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 2/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-03 20:21   ` Steven Rostedt
@ 2006-06-04 17:33     ` Esben Nielsen
  0 siblings, 0 replies; 11+ messages in thread
From: Esben Nielsen @ 2006-06-04 17:33 UTC (permalink / raw)
  To: rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar

On Sat, 3 Jun 2006, Steven Rostedt wrote:

> Disclaimer:  I haven't read all your patches yet.  I'm going one at a
> time to comment, and then I will probably send more emails about the
> overall response. So now, my comments are not on the big picture, but
> simple atomic views.
>
> On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
>> This patch makes it possible to change which context the interrupt handlers
>> for each interrupt run in, hard-irq or threaded if CONFIG_PREEMPT_HARDIRQS is
>> set.
>>
>> The interface is the file
>>   /proc/irq/<irq number>/threaded
>> You can read it to see what the context is now or write one of
>>
>>   irq
>>   fifo <rt priority>
>>   rr <rt priority>
>>   normal <nice value>
>>   batch <nice value>
>>
>> where one of the latter makes the interrupt handler threaded.
>>
>> A replacement for request_irq(), called request_irq2(), is added. When a driver
>
> request_irq2 ... yuck!  Perhaps request_irq_convertible() or something
> similar?  But irq2, no way!

I know...
The problem is:
1) Don't change existing drivers.
2) The change_irq_context() call-back must be set at request_irq(), so it 
it is not enough to just make a new function where the driver can set it's 
the callback after the request_irq().

request_irq_convertible() might be an ok name.


>
>> uses this to install it's irq-handler it can also give a change_irq_context
>> call-back. This call-back is called whenever the irq-context is changed or
>> going to be changed. The call-back must be of the form
>>
>> int driver_change_context(int irq, void *dev_id, enum change_context_cmd cmd)
>
> Eeee, looks like a big change to make on drivers, and something that can
> keep -rt from mainline forever.  But I'll see more as I read.  This
> looks optional but still it will make things more complex.

It is only optional, but the function is very easy to make.

>
>>
>> where
>>
>> enum change_context_cmd {
>>  	IRQ_TO_HARDIRQ,
>>  	IRQ_CAN_THREAD,
>>  	IRQ_TO_THREADED
>> };
>>
>> The call-back is supposed to do the following on
>>   IRQ_TO_HARDIRQ: make sure everything in the interrupt handler is non-blocking
>>                   or return a non-zero error code.
>>   IRQ_CAN_THREAD: Return 0 if it is ok for the interrupt handler to be run in
>>                   thread context. Return a non-zero error code otherwise.
>>   IRQ_TO_THREAD: Now the interrupt handler does run in thread context. The
>>                  driver can now change it's locks to being mutexes. The return
>>                  value is ignored as the driver already got a chance to protest
>>                  above.
>>
>> Index: linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/interrupt.h
>> +++ linux-2.6.16-rt23.spin_mutex/include/linux/interrupt.h
>> @@ -34,21 +34,38 @@ typedef int irqreturn_t;
>>   #define IRQ_HANDLED	(1)
>>   #define IRQ_RETVAL(x)	((x) != 0)
>>
>> +enum change_context_cmd {
>> +	IRQ_TO_HARDIRQ,
>> +	IRQ_CAN_THREAD,
>> +	IRQ_TO_THREADED
>> +};
>> +
>>   struct irqaction {
>>   	irqreturn_t (*handler)(int, void *, struct pt_regs *);
>>   	unsigned long flags;
>>   	cpumask_t mask;
>>   	const char *name;
>>   	void *dev_id;
>> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
>> +	int (*change_context)(int, void *,
>> +			      enum change_context_cmd);
>> +#endif
>>   	struct irqaction *next;
>>   	int irq;
>> -	struct proc_dir_entry *dir, *threaded;
>> +	struct proc_dir_entry *dir;
>> +	struct rcu_head rcu;
>>   };
>>
>>   extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
>>   extern int request_irq(unsigned int,
>>   		       irqreturn_t (*handler)(int, void *, struct pt_regs *),
>>   		       unsigned long, const char *, void *);
>> +extern int request_irq2(unsigned int irq,
>> +			irqreturn_t (*handler)(int, void *, struct pt_regs *),
>> +			unsigned long irqflags, const char * devname,
>> +			void *dev_id,
>> +			int (*change_context)(int, void *,
>> +					      enum change_context_cmd));
>>   extern void free_irq(unsigned int, void *);
>>
>>
>> Index: linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/irq.h
>> +++ linux-2.6.16-rt23.spin_mutex/include/linux/irq.h
>> @@ -47,6 +47,18 @@
>>   # define SA_NODELAY 0x01000000
>>   #endif
>>
>> +#ifndef SA_MUST_THREAD
>> +# define SA_MUST_THREAD 0x02000000
>> +#endif
>> +
>> +/* Set this flag if the irq handler must thread under preempt-rt otherwise not
>> + */
>> +#ifdef CONFIG_PREEMPT_RT
>> +# define SA_MUST_THREAD_RT SA_MUST_THREAD
>> +#else
>> +# define SA_MUST_THREAD_RT 0
>> +#endif
>> +
>>   /*
>>    * IRQ types
>>    */
>> @@ -147,12 +159,13 @@ struct irq_type {
>>    * @chip:		low level hardware access functions - comes from type
>>    * @action:		the irq action chain
>>    * @status:		status information
>> + *                      (protected by the spinlock )
>>    * @depth:		disable-depth, for nested irq_disable() calls
>>    * @irq_count:		stats field to detect stalled irqs
>>    * @irqs_unhandled:	stats field for spurious unhandled interrupts
>>    * @thread:		Thread pointer for threaded preemptible irq handling
>>    * @wait_for_handler:	Waitqueue to wait for a running preemptible handler
>> - * @lock:		locking for SMP
>> + * @lock:		lock around the action list
>>    * @move_irq:		Flag need to re-target interrupt destination
>>    *
>>    * Pad this out to 32 bytes for cache and indexing reasons.
>> Index: linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/Kconfig.preempt
>> +++ linux-2.6.16-rt23.spin_mutex/kernel/Kconfig.preempt
>> @@ -119,6 +119,17 @@ config PREEMPT_HARDIRQS
>>
>>   	  Say N if you are unsure.
>>
>> +config CHANGE_IRQ_CONTEXT
>> +	bool "Change the irq context runtime"
>> +	depends on PREEMPT_HARDIRQS && (PREEMPT_RCU || !SPIN_MUTEXES)
>> +	help
>> +	  You can change wether the IRQ handler(s) for each IRQ number is
>> +          running in hardirq context or as threaded by writing to
>> +          /proc/irq/<number>/threaded
>> +          If PREEMPT_RT is selected the drivers involved must be ready for it,
>> +          though, or the write will fail. Remember to switch on SPIN_MUTEXES as
>> +          well in that case as the drivers which are ready uses spin-mutexes.
>> +
>>   config SPINLOCK_BKL
>>   	bool "Old-Style Big Kernel Lock"
>>   	depends on (PREEMPT || SMP) && !PREEMPT_RT
>> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/internals.h
>> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/internals.h
>> @@ -22,6 +22,10 @@ static inline void end_irq(irq_desc_t *d
>>   }
>>
>>   #ifdef CONFIG_PROC_FS
>> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
>> +extern int make_irq_nodelay(int irq, struct irq_desc *desc);
>> +extern int make_irq_threaded(int irq, struct irq_desc *desc);
>> +#endif
>>   extern void register_irq_proc(unsigned int irq);
>>   extern void register_handler_proc(unsigned int irq, struct irqaction *action);
>>   extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
>> Index: linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/kernel/irq/manage.c
>> +++ linux-2.6.16-rt23.spin_mutex/kernel/irq/manage.c
>> @@ -206,6 +206,153 @@ int can_request_irq(unsigned int irq, un
>>   	return !action;
>>   }
>>
>> +
>> +#ifdef CONFIG_CHANGE_IRQ_CONTEXT
>> +int make_action_nodelay(int irq, struct irqaction *act)
>> +{
>> +	unsigned long flags;
>> +
>> +	if(!(act->flags & SA_MUST_THREAD)) {
>> +		return 0;
>> +	}
>
> Remove the brackets.
>
> Also, let me get this straight.  If the action does not have
> SA_MUST_THREAD, we return?  Or does it mean that if SA_MUST_THREAD is
> not set, then SA_NODELAY must already be set? If that is the case, then
> why are we not checking for SA_NODELAY here?

If SA_MUST_THREAD is not set, there is no blocking inside the action 
handler and therefore it is ok to do this in hardirq aka "nodelay".
Notice SA_MUST_THREAD is set on all handlers in without SA_NODELAY under
PREEMPT_RT.

>
>> +
>> +	if( act->change_context ) {
>> +		int ret =
>> +			act->change_context(irq, act->dev_id, IRQ_TO_HARDIRQ);
>> +		if(ret) {
>> +			printk(KERN_ERR "Failed to change irq handler "
>> +			       "for dev %s on IRQ%d to hardirq "
>> +			       "context (ret: %d)\n", act->name, irq, ret);
>> +			return ret;
>> +		}
>> +		spin_lock_irqsave(&irq_desc[irq].lock, flags);
>> +		act->flags &=~SA_MUST_THREAD;
>
> Both flags are zero here (see below about the WARN_ON)
>
The WARN_ON is a warning on both flags set (or supposed to be...:-)
It doesn't make sense to have SA_MUST_THREAD and SA_NODELAY both set.

>> +		act->flags |= SA_NODELAY;
>> +		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
>> +		return 0;
>> +	}
>> +	else {
>> +		printk(KERN_ERR "Irq handler "
>> +		       "for dev %s on IRQ%d can not be changed"
>> +		       " to hardirq context\n", act->name, irq);
>> +		return -ENOSYS;
>> +	}
>> +
>> +}
>> +
>> +
>> +static int __make_irq_threaded(int irq, struct irq_desc *desc);
>> +
>> +int make_irq_nodelay(int irq, struct irq_desc *desc)
>> +{
>> +	int ret = 0;
>> +	struct irqaction *act;
>> +	unsigned long flags;
>> +
>> +	rcu_read_lock();
>> +	for(act = desc->action; act; act = act->next) {
>> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
>
> This WARN_ON is not protected by the descriptor lock, so if this
> function is run on two CPUs at the same time, then the act->flags can
> have both of these zero by the above code.

Both be set :-) But bug noticed.

>
>> +		if(act->flags & SA_MUST_THREAD) {
>> +			ret = make_action_nodelay(irq, act);
>> +			if(ret) {
>> +				printk(KERN_ERR "Could not make irq %d "
>> +				       "nodelay (errno %d)\n",
>> +				       irq, ret);
>
> We printk on failure from above, and then printk again here?

Well might be too verbose.

>
>> +				goto failed;
>> +			}
>> +		}
>> +	}
>> +	spin_lock_irqsave(&desc->lock, flags);
>> +	desc->status |= IRQ_NODELAY;
>> +	spin_unlock_irqrestore(&desc->lock, flags);
>> +	rcu_read_unlock();
>> +
>> +	return 0;
>> + failed:
>> +	__make_irq_threaded(irq, desc);
>> +	rcu_read_unlock();
>> +	return ret;
>> +}
>> +
>> +int action_may_thread(int irq, struct irqaction *act)
>> +{
>> +	if(!act->change_context)
>> +		return !(act->flags & SA_NODELAY);
>> +
>> +	return act->change_context(irq, act->dev_id, IRQ_CAN_THREAD) == 0;
>
> This IRQ_CAN_THREAD just looks out of place of the other two.  It feels
> very "hacky" to check if it can thread.  But what do I know?

The problem is that the step to make a handler be threaded has to be split 
in two: First ask, then remove IRQ_NODELAY, then change the locks to 
mutexes. So the driver need to be involved twice. I could skip this, but I
put it in for generallity.

>
>> +}
>> +
>> +
>> +static int __make_irq_threaded(int irq, struct irq_desc *desc)
>> +{
>> +	struct irqaction *act;
>> +
>> +	for(act = desc->action; act; act = act->next) {
>> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
>> +		if(!action_may_thread(irq, act)) {
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>> +	if (start_irq_thread(irq, desc))
>> +		return -ENOMEM;
>
> I know that currently start_irq_thread can only return -ENOMEM on
> failure, but it may be more robust to capture the return code and return
> that anyway.

*nod*
This was copied from the original code, so the same thing is elsewher, 
too.

>
>> +
>> +	spin_lock_irq(&desc->lock);
>> +	desc->status &= ~IRQ_NODELAY;
>> +	spin_unlock_irq(&desc->lock);
>> +
>> +	/* We can't make irq handlers change their context before we
>> +	   are sure no CPU is running them in hard irq context */
>> +	synchronize_irq(irq);
>> +
>> +	for(act = desc->action; act; act = act->next) {
>> +		WARN_ON(((~act->flags) & (SA_MUST_THREAD|SA_NODELAY)) == 0);
>> +		if(act->change_context) {
>> +			/* This callback can't fail */
>
> Will all device drivers know that the callback can't fail?

Well, it is in the documentation.

>
>> +			act->change_context(irq, act->dev_id, IRQ_TO_THREADED);
>> +			spin_lock_irq(&desc->lock);
>> +			act->flags &=~SA_NODELAY;
>> +			act->flags |= SA_MUST_THREAD;
>> +			spin_unlock_irq(&desc->lock);
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>
> [snipped the rest]
>
> -- Steve
>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 3/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-03 20:39   ` Steven Rostedt
@ 2006-06-04 17:34     ` Esben Nielsen
  0 siblings, 0 replies; 11+ messages in thread
From: Esben Nielsen @ 2006-06-04 17:34 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar



On Sat, 3 Jun 2006, Steven Rostedt wrote:

> On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
>> Makes it possible for the e100 ethernet driver to have it's interrupt handler
>> in both hard-irq and threaded context under PREEMPT_RT.
>>
>> Index: linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/net/e100.c
>> +++ linux-2.6.16-rt23.spin_mutex/drivers/net/e100.c
>> @@ -530,7 +530,7 @@ struct nic {
>>   	enum ru_state ru_running;
>>
>>   	spinlock_t cb_lock			____cacheline_aligned;
>> -	spinlock_t cmd_lock;
>> +	spin_mutex_t cmd_lock;
>>   	struct csr __iomem *csr;
>>   	enum scb_cmd_lo cuc_cmd;
>>   	unsigned int cbs_avail;
>> @@ -1950,6 +1950,30 @@ static int e100_rx_alloc_list(struct nic
>>   	return 0;
>>   }
>>
>> +static int e100_change_context(int irq, void *dev_id,
>> +			       enum change_context_cmd cmd)
>> +{
>> +	struct net_device *netdev = dev_id;
>> +	struct nic *nic = netdev_priv(netdev);
>> +
>> +	switch(cmd)
>> +	{
>> +	case IRQ_TO_HARDIRQ:
>> +		if(!spin_mutexes_can_spin())
>> +			return -ENOSYS;
>> +
>> +		spin_mutex_to_spin(&nic->cmd_lock);
>> +		break;
>> +	case IRQ_CAN_THREAD:
>> +		/* Ok - return 0 */
>> +		break;
>
> Why even bother with the IRQ_CAN_THREAD.  If this would be anything
> other than OK, then we shouldn't be using that request_irq2 (yuck!) call
> in the first place.
>
Just for the sake of generality. Nothing else. It would be very unlikely, 
as you say, that it wouldn't return 0.

> -- Steve

Esben


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [patch 4/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime.
  2006-06-03 21:30   ` Steven Rostedt
@ 2006-06-04 22:50     ` Esben Nielsen
  0 siblings, 0 replies; 11+ messages in thread
From: Esben Nielsen @ 2006-06-04 22:50 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Esben Nielsen, linux-kernel, Ingo Molnar

On Sat, 3 Jun 2006, Steven Rostedt wrote:

> On Fri, 2006-06-02 at 23:23 +0100, Esben Nielsen wrote:
>> Makes it possible for the 8250 serial driver to have it's interrupt handler
>> in both hard-irq and threaded context under PREEMPT_RT.
>>
>> Index: linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/drivers/serial/8250.c
>> +++ linux-2.6.16-rt23.spin_mutex/drivers/serial/8250.c
>> @@ -140,7 +140,7 @@ struct uart_8250_port {
>>   };
>>
>>   struct irq_info {
>> -	spinlock_t		lock;
>> +	spin_mutex_t		lock;
>>   	struct list_head	*head;
>>   };
>>
>> @@ -1287,6 +1287,47 @@ serial8250_handle_port(struct uart_8250_
>>   	spin_unlock(&up->port.lock);
>>   }
>>
>> +
>> +static int serial8250_change_context(int irq, void *dev_id,
>> +				     enum change_context_cmd cmd)
>> +{
>> +	struct irq_info *i = dev_id;
>> +	struct list_head *l;
>> +
>> +	switch(cmd) {
>> +	case IRQ_TO_HARDIRQ:
>> +		/* Spin mutexes not available: */
>> +		if(!spin_mutexes_can_spin())
>> +			return -ENOSYS;
>> +
>> +		/* First do the inner locks */
>> +		l = i->head;
>> +		do {
>> +			struct uart_8250_port *up;
>> +			up = list_entry(l, struct uart_8250_port, list);
>> +			spin_mutex_to_spin(&up->port.lock);
>> +			l = l->next;
>> +		} while(l != i->head && l != NULL);
>
> Now this shows the flaw in your whole design.  Don't get me wrong, I'm
> very impressed that you got this working for you.  But for this to be
> accepted, it can't be too intrusive.  It is also very error prone since
> you must know not only the locks in the handlers, but also the locks in
> all the functions that you call. And each device must update them.
>
> Two things:
>
> 1) What if you have two serials, and one is threaded and one is not.  It
> might mutex a lock that should be a spin.
>
> 2) You forgot to update sysrq_key_table_lock.  That can be called from
> the serial interrupt as well.  So even just working with two devices
> it's not trivial to get things right.  Imagine what it would take for
> the whole kernel.  Not to mention the maintenance nightmare it would
> cause.

I imagine the change would be done by those people using the drivers in a 
RT context. If they want to change a handler to run in hard-IRQ context 
they will have to identify all the locks anyway. Then can just as well do 
it this way and then submit it to the main-line kernel. If they just 
change them to raw_spin_lock_t it can't be submitted.

I considered making a system where the right lock form could be chosen 
compiletime. Then the drivers would have an suboption "threaded irq" or 
"hard irq" and from that choice the right lock types where chosen.
But then came the problem up on the list about shared interrupts. Then the 
ability to change the context runtime would be usefull.


>
> I do give you an A+ for effort, but I'm not sure if this is the
> solution.  Frankly, it scares me, unless you can find a sure fire way to
> update all the locks properly, that would not cause problems elsewhere.

Well, you will very soon hit a BUG: scheduling while in atomic if you miss 
some. Ofcourse, I did miss some, and I didn't get that because I didn't 
use that subsystem in my tests.

>
> Maybe the -rt way can introduce a new paradigm.  Maybe we shouldn't have
> top halves at all, but instead a way to register threads to devices.
> Think about it, the big picture, what are interrupts for?  They are to
> tell us that an event happened, and if there is nothing waiting for that
> event, then it was worthless.  So really, what we could do (and this
> would perhaps also be good for mainline, although it would be very hard
> to get accepted) is to have registering functions for each device.
>
> So when a thread waits on a device, it is registered with the irq that
> the device has, instead of a wait queue.  So each interrupt would have a
> priority sorted list of threads that are waiting for that interrupt.
> When the interrupt happens, the highest priority thread would wake up to
> handle it, instead of an interrupt handler. If the interrupt belonged to
> another thread, the one that was woken up would pass it off to the
> thread waiting and go back to sleep.
>
> This also allows the interrupt handlers to schedule and even write to
> user space, once it finds out that the thread working on the interrupt
> is the thread in question.
>
> Let's say that we have a network card, which are commonly shared with
> other devices, and it also has to do a lot before it knows what process
> it is working on for.  And lets just say it is shared (uncommonly) with
> a IDE device.
>
> Now an interrupt comes in on this line.  There's a list of all the
> highest priority tasks waiting on each device (not all tasks, just the
> highest for each device, like the pi_list in the rt code).  So the
> highest priority tasks for each device is woken up (the highest for the
> network card and the highest for the IDE) and the interrupt handler
> would return (not unlike the irq threads of the current -rt, except, we
> wouldn't have irq threads but tasks that are waiting on them).  These
> tasks would be in the registering function of the device, which would
> know how to handle the interrupt.  Now if the current process is higher
> in priority, then it wont be preempted by this interrupt. But if a
> higher priority process is waiting, then the current process is
> preempted.  So this is like interrupt handlers inheriting priorities of
> processes waiting.
>
> So the process now checks to see if the interrupt belongs to it, and if
> it does, then simply handle the case.  Otherwise, if it does not belong
> to the device, simply go back to sleep and wait again.  If this belongs
> to the device, but for another waiter, using our example, the network
> card, the process realized that it belonged to another process on
> another port, it could wake up that process and pass information on how
> much work the first process has done, so that the other process doesn't
> repeat the work.
>
> I'm just making this up as I go, but it really looks like this can be
> examined more in an academic arena.  Perhaps it would even lead to less
> context switches, since the process waiting would already be woken up on
> interrupt.  The above idea is based on what interrupts are for.  They
> don't happen for no reasons, but there's almost always someone waiting
> for it.
>

I must say I am not sure how this idea _really_ differs from the present 
setup, but then again, I don't really understand where you are going. The 
only major difference is that every "interrupt handler" runs in it's own 
thread even though they share the same interrupt line. That again leads to 
the usual problem: When should the interrupt be reenabled?

As long as interrupts can be shared, all handlers of the same interrupt 
has to be handled before the it can be reinabled. Sure, you can sort it 
out in a way such that the high priority handler runs first, but the worst 
case lantency is still set by the lower priority handler, because the 
system simply can't reenable the interrupt after a handler has handled it.
So if the high priority event happens just after the low priority one, but 
after the high priority handler has run, the system has to wait for the 
low priority handler to handle it's work, reenable the interrupt, see the 
high priority event and then call that handler.

I can't see no matter how you split it up in threads is going to solve 
this problem.  Actually, having them in one thread makes more sense,
because when the latency will depend on the lowest priority anyway they 
all should have the same priority.


> -- Steve

If you really want to change stuff I have had another idea:

Make a pool of non-running threads. In the low level assembly code 
handling interrupts, take one out of the pool and move the stack-pointer 
to the buttom of the stack. Now jump to the generic irq system using 
that stack instead of the stack of the task running when the interrupt occured.
If an interrupt handler wants to run preemptible: Switch on interrupts and 
let the scheduler handle the rest. That way there isn't really 
any task switch, no wait queue or other "expensive" things to get the 
interrupts preemptible. The only thing I can think of being expensive is 
that you need to fix up the stack on the task running when the interrupt 
occuring, such that a schedule() can pick it up cleanly and take the 
interrupt thread in and out of the run queue - but that might even be 
defered to the case where it in fact is preempted.

By doing that we might get rid of hard-irq context all together, as 
the interrupt gets "threaded" allready within the low level generic 
code (although the interrupts will still be off at that point).

Esben


>
>> +		spin_mutex_to_spin(&i->lock);
>> +		break;
>> +	case IRQ_CAN_THREAD:
>> +		break;
>> +	case IRQ_TO_THREADED:
>> +		spin_mutex_to_mutex(&i->lock);2A
>> +		l = i->head;
>> +
>> +		do {
>> +			struct uart_8250_port *up;
>> +			up = list_entry(l, struct uart_8250_port, list);
>> +			spin_mutex_to_mutex(&up->port.lock);
>> +			l = l->next;
>> +		} while(l != i->head && l != NULL);
>> +		break;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>   /*
>>    * This is the serial driver's interrupt routine.
>>    *
>> @@ -1393,8 +1434,10 @@ static int serial_link_irq_chain(struct
>>   		i->head = &up->list;
>>   		spin_unlock_irq(&i->lock);
>>
>> -		ret = request_irq(up->port.irq, serial8250_interrupt,
>> -				  irq_flags, "serial", i);
>> +		ret = request_irq2(up->port.irq, serial8250_interrupt,
>> +				   irq_flags | SA_MUST_THREAD_RT,
>> +				   "serial", i,
>> +				   serial8250_change_context);
>>   		if (ret < 0)
>>   			serial_do_unlink(i, up);
>>   	}
>> Index: linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
>> ===================================================================
>> --- linux-2.6.16-rt23.spin_mutex.orig/include/linux/serial_core.h
>> +++ linux-2.6.16-rt23.spin_mutex/include/linux/serial_core.h
>> @@ -206,7 +206,7 @@ struct uart_icount {
>>   typedef unsigned int __bitwise__ upf_t;
>>
>>   struct uart_port {
>> -	spinlock_t		lock;			/* port lock */
>> +	spin_mutex_t		lock;			/* port lock */
>>   	unsigned int		iobase;			/* in/out[bwl] */
>>   	unsigned char __iomem	*membase;		/* read/write[bwl] */
>>   	unsigned int		irq;			/* irq number */
>>
>> --
>> -
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
> -- 
> Steven Rostedt
> Senior Programmer
> Kihon Technologies
> (607)786-4830
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2006-06-04 21:49 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20060602165336.147812000@localhost>
2006-06-02 22:23 ` [patch 1/5] [PREEMPT_RT] Changing interrupt handlers from running in thread to hardirq and back runtime Esben Nielsen
2006-06-02 22:23 ` [patch 2/5] " Esben Nielsen
2006-06-03 20:21   ` Steven Rostedt
2006-06-04 17:33     ` Esben Nielsen
2006-06-02 22:23 ` [patch 3/5] " Esben Nielsen
2006-06-03 20:39   ` Steven Rostedt
2006-06-04 17:34     ` Esben Nielsen
2006-06-02 22:23 ` [patch 4/5] " Esben Nielsen
2006-06-03 21:30   ` Steven Rostedt
2006-06-04 22:50     ` Esben Nielsen
2006-06-02 22:23 ` [patch 5/5] " Esben Nielsen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox