public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch] PPC/PPC64 port of voluntary preempt patch
@ 2004-08-23 22:18 Scott Wood
  2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
  2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
  0 siblings, 2 replies; 122+ messages in thread
From: Scott Wood @ 2004-08-23 22:18 UTC (permalink / raw)
  To: mingo; +Cc: manas.saksena, linux-kernel

I have attached a port of the voluntary preempt patch to PPC and
PPC64.  The patch is against P7, but it applies against P8 as well.

I've tested it on a dual G5 Mac, both in uniprocessor and SMP.

Some notes on changes to the generic part of the patch/existing
generic code:

	I changed the generic code so that request_irq() prior to the
	scheduler being ready to run.  Previously, if this happened,
	it'd try to spawn a thread anyway, and oops.
	
	I changed the no-op definitions of voluntary_resched() and such
	to be empty inline functions, rather than #defined to 0.  When 0
	is used, newer GCCs (I'm using 3.4.1) issue a warning about
	statements with no effect.  Due to this, I removed the redundant
	definition of voluntary_resched() from sched.h (it's also in
	kernel.h, which is always included by sched.h).  Does it need to
	be in kernel.h?
	
	The WARN_ON(system_state == SYSTEM_BOOTING) was flooding me
	with warnings; this stopped when I moved the setting of
	system_state before the init thread was started (it seems
	rather odd that one would not be able to schedule when creating
	a thread...).
	
	The latency tracker at one point used cpu_khz/1000, and at another
	used cpu_khz/1024.  Is there a reason why cycles_to_usecs isn't
	used in both places?
	
	It's not exactly related to PPC, but I changed 
	if (latency < preempt_max_latency) to use <= instead, as I was
	getting the same latency printed out over and over.
	
I haven't (yet) fixed any of the specific latencies I've found on the
Mac; this patch just supplies the generic functionality.

Signed-off-by: Scott Wood <scott.wood@timesys.com>

diff -urN vpP7/arch/ppc/Kconfig vpP7-ppc/arch/ppc/Kconfig
--- vpP7/arch/ppc/Kconfig	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/Kconfig	2004-08-23 13:57:14.000000000 -0400
@@ -808,6 +808,19 @@
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption"
+	default y
+	help
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to minimize the maximum
+	  latency of rescheduling, providing faster application reactions.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
+
 config HIGHMEM
 	bool "High memory support"
 
diff -urN vpP7/arch/ppc/kernel/entry.S vpP7-ppc/arch/ppc/kernel/entry.S
--- vpP7/arch/ppc/kernel/entry.S	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/entry.S	2004-08-23 13:57:14.000000000 -0400
@@ -610,6 +610,11 @@
 
 /* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
+	lis	r9, kernel_preemption@ha
+	lwz	r9, kernel_preemption@l(r9)
+	cmpwi	r9, 0
+	bne	restore
+
 	/* check current_thread_info->preempt_count */
 	rlwinm	r9,r1,0,0,18
 	lwz	r0,TI_PREEMPT(r9)
diff -urN vpP7/arch/ppc/kernel/irq.c vpP7-ppc/arch/ppc/kernel/irq.c
--- vpP7/arch/ppc/kernel/irq.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/irq.c	2004-08-23 13:57:14.000000000 -0400
@@ -64,8 +64,6 @@
 void enable_irq(unsigned int irq_nr);
 void disable_irq(unsigned int irq_nr);
 
-static void register_irq_proc (unsigned int irq);
-
 #define MAXCOUNT 10000000
 
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
@@ -100,6 +98,7 @@
 	unsigned int i;
 	if ( mem_init_done )
 		return kmalloc(size,pri);
+		
 	for ( i = 0; i < IRQ_KMALLOC_ENTRIES ; i++ )
 		if ( ! ( cache_bitmask & (1<<i) ) )
 		{
@@ -121,107 +120,6 @@
 	kfree(ptr);
 }
 
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = irq_desc + irq;
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
-		if (desc->handler) {
-			if (desc->handler->startup)
-				desc->handler->startup(irq);
-			else if (desc->handler->enable)
-				desc->handler->enable(irq);
-		}
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-void free_irq(unsigned int irq, void* dev_id)
-{
-	irq_desc_t *desc;
-	struct irqaction **p;
-	unsigned long flags;
-
-	desc = irq_desc + irq;
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			synchronize_irq(irq);
-			irq_kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
 int request_irq(unsigned int irq,
 	irqreturn_t (*handler)(int, void *, struct pt_regs *),
 	unsigned long irqflags, const char * devname, void *dev_id)
@@ -262,95 +160,6 @@
 
 EXPORT_SYMBOL(request_irq);
 
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock.
- */
-
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
-
-void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
-
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
-
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = irq_desc + irq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced\n", irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -410,24 +219,6 @@
 	return 0;
 }
 
-static inline void
-handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		status |= action->flags;
-		action->handler(irq, action->dev_id, regs);
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-}
-
 /*
  * Eventually, this should take an array of interrupts and an array size
  * so it can dispatch multiple interrupts.
@@ -482,6 +273,8 @@
 	if (unlikely(!action))
 		goto out;
 
+	if (generic_redirect_hardirq(desc))
+		goto out_no_end;
 
 	/*
 	 * Edge triggered interrupts need to remember
@@ -494,10 +287,14 @@
 	 * SMP environment.
 	 */
 	for (;;) {
+		irqreturn_t action_ret;
+	
 		spin_unlock(&desc->lock);
-		handle_irq_event(irq, regs, action);
+		action_ret = generic_handle_IRQ_event(irq, regs, action);
 		spin_lock(&desc->lock);
-
+		
+		if (!noirqdebug)
+			generic_note_interrupt(irq, desc, action_ret);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
@@ -514,13 +311,15 @@
 		else if (irq_desc[irq].handler->enable)
 			irq_desc[irq].handler->enable(irq);
 	}
+
+out_no_end:
 	spin_unlock(&desc->lock);
 }
 
 void do_IRQ(struct pt_regs *regs)
 {
 	int irq, first = 1;
-        irq_enter();
+	irq_enter();
 
 	/*
 	 * Every platform is required to implement ppc_md.get_irq.
@@ -537,7 +336,7 @@
 	if (irq != -2 && first)
 		/* That's not SMP safe ... but who cares ? */
 		ppc_spurious_interrupts++;
-        irq_exit();
+	irq_exit();
 }
 
 unsigned long probe_irq_on (void)
@@ -559,148 +358,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-	while (irq_desc[irq].status & IRQ_INPROGRESS)
-		barrier();
-}
-#endif /* CONFIG_SMP */
-
-static struct proc_dir_entry *root_irq_dir;
-static struct proc_dir_entry *irq_dir[NR_IRQS];
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
-#ifdef CONFIG_IRQ_ALL_CPUS
-#define DEFAULT_CPU_AFFINITY CPU_MASK_ALL
-#else
-#define DEFAULT_CPU_AFFINITY cpumask_of_cpu(0)
-#endif
-
-cpumask_t irq_affinity [NR_IRQS];
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int irq = (int) data, full_count = count, err;
-	cpumask_t new_value, tmp;
-
-	if (!irq_desc[irq].handler->set_affinity)
-		return -EIO;
-
-	err = cpumask_parse(buffer, count, new_value);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 *
-	 * We assume a 1-1 logical<->physical cpu mapping here.  If
-	 * we assume that the cpu indices in /proc/irq/../smp_affinity
-	 * are actually logical cpu #'s then we have no problem.
-	 *  -- Cort <cort@fsmlabs.com>
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp))
-		return -EINVAL;
-
-	irq_affinity[irq] = new_value;
-	irq_desc[irq].handler->set_affinity(irq, new_value);
-
-	return full_count;
-}
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	int err;
-	int full_count = count;
-	cpumask_t *mask = (cpumask_t *)data;
-	cpumask_t new_value;
-
-	err = cpumask_parse(buffer, count, new_value);
-	if (err)
-		return err;
-
-	*mask = new_value;
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	entry->nlink = 1;
-	entry->data = (void *)irq;
-	entry->read_proc = irq_affinity_read_proc;
-	entry->write_proc = irq_affinity_write_proc;
-
-	smp_affinity_entry[irq] = entry;
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
-{
-	struct proc_dir_entry *entry;
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for (i = 0; i < NR_IRQS; i++) {
-		if (irq_desc[i].handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
 irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
 {
 	return IRQ_NONE;
@@ -708,10 +365,7 @@
 
 void __init init_IRQ(void)
 {
-	int i;
-
-	for (i = 0; i < NR_IRQS; ++i)
-		irq_affinity[i] = DEFAULT_CPU_AFFINITY;
-
 	ppc_md.init_IRQ();
 }
+
+struct hw_interrupt_type no_irq_type;
diff -urN vpP7/arch/ppc/kernel/misc.S vpP7-ppc/arch/ppc/kernel/misc.S
--- vpP7/arch/ppc/kernel/misc.S	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/misc.S	2004-08-23 16:07:29.000000000 -0400
@@ -1165,6 +1165,60 @@
 _GLOBAL(__main)
 	blr
 
+#ifdef CONFIG_LATENCY_TRACE
+
+_GLOBAL(_mcount)
+	stwu	r1, -48(r1)
+
+	stw	r3, 8(r1)
+	stw	r4, 12(r1)
+	stw	r5, 16(r1)
+	stw	r6, 20(r1)
+	stw	r7, 24(r1)
+	stw	r8, 28(r1)
+	stw	r9, 32(r1)
+	stw	r10, 36(r1)
+
+	mflr	r3
+	stw	r3, 40(r1)
+
+	mfcr	r0
+	stw	r0, 44(r1)
+
+	lwz	r4, 52(r1)
+
+	// Don't call do_mcount if we haven't relocated to 0xc0000000 yet.
+	// This assumes that the ordinary load address is below
+	// 0x80000000.
+
+	andis.	r0, r3, 0x8000
+	beq-	mcount_out
+	bl	do_mcount
+mcount_out:
+
+	lwz	r3, 8(r1)
+	lwz	r4, 12(r1)
+	lwz	r5, 16(r1)
+	lwz	r6, 20(r1)
+	lwz	r7, 24(r1)
+	lwz	r8, 28(r1)
+	lwz	r9, 32(r1)
+	lwz	r10, 36(r1)
+
+	lwz	r0, 40(r1)
+	mtctr	r0
+
+	lwz	r0, 44(r1)
+	mtcr	r0
+
+	lwz	r0, 52(r1)
+	mtlr	r0
+
+	addi	r1, r1, 48
+	bctr
+
+#endif
+
 #define SYSCALL(name) \
 _GLOBAL(name) \
 	li	r0,__NR_##name; \
diff -urN vpP7/arch/ppc/kernel/ppc_ksyms.c vpP7-ppc/arch/ppc/kernel/ppc_ksyms.c
--- vpP7/arch/ppc/kernel/ppc_ksyms.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/kernel/ppc_ksyms.c	2004-08-23 13:57:14.000000000 -0400
@@ -84,9 +84,6 @@
 EXPORT_SYMBOL(sys_sigreturn);
 EXPORT_SYMBOL(ppc_n_lost_interrupts);
 EXPORT_SYMBOL(ppc_lost_interrupts);
-EXPORT_SYMBOL(enable_irq);
-EXPORT_SYMBOL(disable_irq);
-EXPORT_SYMBOL(disable_irq_nosync);
 EXPORT_SYMBOL(probe_irq_mask);
 
 EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
@@ -205,7 +202,6 @@
 #ifdef CONFIG_SMP
 EXPORT_SYMBOL(smp_call_function);
 EXPORT_SYMBOL(smp_hw_index);
-EXPORT_SYMBOL(synchronize_irq);
 #endif
 
 EXPORT_SYMBOL(ppc_md);
diff -urN vpP7/arch/ppc/platforms/pmac_pic.c vpP7-ppc/arch/ppc/platforms/pmac_pic.c
--- vpP7/arch/ppc/platforms/pmac_pic.c	2004-08-17 15:22:33.000000000 -0400
+++ vpP7-ppc/arch/ppc/platforms/pmac_pic.c	2004-08-23 13:57:14.000000000 -0400
@@ -440,8 +440,9 @@
 				OpenPIC_InitSenses = senses;
 				OpenPIC_NumInitSenses = 128;
 				openpic2_init(PMAC_OPENPIC2_OFFSET);
-				if (request_irq(irqctrler2->intrs[0].line, k2u3_action, 0,
-						"U3->K2 Cascade", NULL))
+				if (request_irq(irqctrler2->intrs[0].line, k2u3_action,
+				                SA_NODELAY | SA_INTERRUPT,
+				                "U3->K2 Cascade", NULL))
 					printk("Unable to get OpenPIC IRQ for cascade\n");
 			}
 #endif /* CONFIG_POWER4 */
@@ -455,7 +456,7 @@
 				if (pswitch && pswitch->n_intrs) {
 					nmi_irq = pswitch->intrs[0].line;
 					openpic_init_nmi_irq(nmi_irq);
-					request_irq(nmi_irq, xmon_irq, 0,
+					request_irq(nmi_irq, xmon_irq, SA_NODELAY,
 						    "NMI - XMON", NULL);
 				}
 			}
@@ -553,7 +554,7 @@
 			(int)irq_cascade);
 		for ( i = max_real_irqs ; i < max_irqs ; i++ )
 			irq_desc[i].handler = &gatwick_pic;
-		request_irq( irq_cascade, gatwick_action, SA_INTERRUPT,
+		request_irq( irq_cascade, gatwick_action, SA_INTERRUPT | SA_NODELAY,
 			     "cascade", NULL );
 	}
 	printk("System has %d possible interrupts\n", max_irqs);
@@ -562,7 +563,7 @@
 			max_real_irqs);
 
 #ifdef CONFIG_XMON
-	request_irq(20, xmon_irq, 0, "NMI - XMON", NULL);
+	request_irq(20, xmon_irq, SA_NODELAY, "NMI - XMON", NULL);
 #endif	/* CONFIG_XMON */
 }
 
diff -urN vpP7/arch/ppc/platforms/sbc82xx.c vpP7-ppc/arch/ppc/platforms/sbc82xx.c
--- vpP7/arch/ppc/platforms/sbc82xx.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc/platforms/sbc82xx.c	2004-08-23 13:57:14.000000000 -0400
@@ -212,7 +212,7 @@
 	sbc82xx_i8259_map[1] = sbc82xx_i8259_mask; /* Set interrupt mask */
 
 	/* Request cascade IRQ */
-	if (request_irq(SIU_INT_IRQ6, sbc82xx_i8259_demux, SA_INTERRUPT,
+	if (request_irq(SIU_INT_IRQ6, sbc82xx_i8259_demux, SA_INTERRUPT | SA_NODELAY,
 			"i8259 demux", 0)) {
 		printk("Installation of i8259 IRQ demultiplexer failed.\n");
 	}
diff -urN vpP7/arch/ppc/syslib/i8259.c vpP7-ppc/arch/ppc/syslib/i8259.c
--- vpP7/arch/ppc/syslib/i8259.c	2004-06-16 01:19:22.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/i8259.c	2004-08-23 13:57:14.000000000 -0400
@@ -185,7 +185,7 @@
 	spin_unlock_irqrestore(&i8259_lock, flags);
 
 	/* reserve our resources */
-	request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT,
+	request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT | SA_NODELAY,
 				"82c59 secondary cascade", NULL );
 	request_resource(&ioport_resource, &pic1_iores);
 	request_resource(&ioport_resource, &pic2_iores);
diff -urN vpP7/arch/ppc/syslib/m8xx_setup.c vpP7-ppc/arch/ppc/syslib/m8xx_setup.c
--- vpP7/arch/ppc/syslib/m8xx_setup.c	2004-06-16 01:19:22.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/m8xx_setup.c	2004-08-23 13:57:14.000000000 -0400
@@ -281,7 +281,8 @@
                 irq_desc[i].handler = &i8259_pic;
         i8259_pic.irq_offset = NR_SIU_INTS;
         i8259_init();
-        request_8xxirq(ISA_BRIDGE_INT, mbx_i8259_action, 0, "8259 cascade", NULL);
+        request_8xxirq(ISA_BRIDGE_INT, mbx_i8259_action,
+                       SA_INTERRUPT | SA_NODELAY, "8259 cascade", NULL);
         enable_irq(ISA_BRIDGE_INT);
 #endif
 }
diff -urN vpP7/arch/ppc/syslib/open_pic.c vpP7-ppc/arch/ppc/syslib/open_pic.c
--- vpP7/arch/ppc/syslib/open_pic.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc/syslib/open_pic.c	2004-08-23 13:57:14.000000000 -0400
@@ -580,16 +580,16 @@
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI0 (call function)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+1,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI1 (reschedule)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+2,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI2 (invalidate tlb)", NULL);
 	request_irq(OPENPIC_VEC_IPI+open_pic_irq_offset+3,
-		    openpic_ipi_action, SA_INTERRUPT,
+		    openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI3 (xmon break)", NULL);
 
 	for ( i = 0; i < OPENPIC_NUM_IPI ; i++ )
@@ -687,7 +687,7 @@
 {
 	openpic_cascade_irq = irq;
 	openpic_cascade_fn = cascade_fn;
-	if (request_irq(irq, no_action, SA_INTERRUPT, name, NULL))
+	if (request_irq(irq, no_action, SA_INTERRUPT | SA_NODELAY, name, NULL))
 		printk("Unable to get OpenPIC IRQ %d for cascade\n",
 				irq - open_pic_irq_offset);
 }
@@ -798,6 +798,10 @@
 }
 #endif /* notused */
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+#define __SLOW_VERSION__
+#endif
+
 /* No spinlocks, should not be necessary with the OpenPIC
  * (1 register = 1 interrupt and we have the desc lock).
  */
diff -urN vpP7/arch/ppc64/Kconfig vpP7-ppc/arch/ppc64/Kconfig
--- vpP7/arch/ppc64/Kconfig	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/Kconfig	2004-08-23 14:39:44.000000000 -0400
@@ -206,6 +206,18 @@
 	  Say Y here if you are building a kernel for a desktop, embedded
 	  or real-time system.  Say N if you are unsure.
 
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption"
+	default y
+	help
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to minimize the maximum
+	  latency of rescheduling, providing faster application reactions.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+
 config MSCHUNKS
 	bool
 	depends on PPC_ISERIES
diff -urN vpP7/arch/ppc64/kernel/entry.S vpP7-ppc/arch/ppc64/kernel/entry.S
--- vpP7/arch/ppc64/kernel/entry.S	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/entry.S	2004-08-23 13:57:14.000000000 -0400
@@ -548,6 +548,12 @@
 #ifdef CONFIG_PREEMPT
 	andi.	r0,r3,MSR_PR	/* Returning to user mode? */
 	bne	user_work
+
+	LOADBASE(r8, kernel_preemption)
+	lwz	r8, kernel_preemption@l(r8)
+	cmpwi	r8, 0
+	bne	restore
+
 	/* Check that preempt_count() == 0 and interrupts are enabled */
 	lwz	r8,TI_PREEMPT(r9)
 	cmpwi	cr1,r8,0
diff -urN vpP7/arch/ppc64/kernel/i8259.c vpP7-ppc/arch/ppc64/kernel/i8259.c
--- vpP7/arch/ppc64/kernel/i8259.c	2004-06-16 01:20:26.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/i8259.c	2004-08-23 13:57:14.000000000 -0400
@@ -160,7 +160,7 @@
         outb(cached_A1, 0xA1);
         outb(cached_21, 0x21);
 	spin_unlock_irqrestore(&i8259_lock, flags);
-        request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT,
+        request_irq( i8259_pic_irq_offset + 2, no_action, SA_INTERRUPT | SA_NODELAY,
                      "82c59 secondary cascade", NULL );
         
 }
diff -urN vpP7/arch/ppc64/kernel/irq.c vpP7-ppc/arch/ppc64/kernel/irq.c
--- vpP7/arch/ppc64/kernel/irq.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/irq.c	2004-08-23 13:57:14.000000000 -0400
@@ -59,8 +59,6 @@
 extern void iSeries_smp_message_recv( struct pt_regs * );
 #endif
 
-static void register_irq_proc (unsigned int irq);
-
 irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
 	[0 ... NR_IRQS-1] = {
 		.lock = SPIN_LOCK_UNLOCKED
@@ -71,78 +69,6 @@
 int ppc_spurious_interrupts;
 unsigned long lpevent_count;
 
-int
-setup_irq(unsigned int irq, struct irqaction * new)
-{
-	int shared = 0;
-	unsigned long flags;
-	struct irqaction *old, **p;
-	irq_desc_t *desc = get_irq_desc(irq);
-
-	/*
-	 * Some drivers like serial.c use request_irq() heavily,
-	 * so we have to be careful not to interfere with a
-	 * running system.
-	 */
-	if (new->flags & SA_SAMPLE_RANDOM) {
-		/*
-		 * This function might sleep, we want to call it first,
-		 * outside of the atomic block.
-		 * Yes, this might clear the entropy pool if the wrong
-		 * driver is attempted to be loaded, without actually
-		 * installing a new handler, but is this really a problem,
-		 * only the sysadmin is able to do this.
-		 */
-		rand_initialize_irq(irq);
-	}
-
-	/*
-	 * The following block of code has to be executed atomically
-	 */
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	if ((old = *p) != NULL) {
-		/* Can't share interrupts unless both agree to */
-		if (!(old->flags & new->flags & SA_SHIRQ)) {
-			spin_unlock_irqrestore(&desc->lock,flags);
-			return -EBUSY;
-		}
-
-		/* add new interrupt at end of irq queue */
-		do {
-			p = &old->next;
-			old = *p;
-		} while (old);
-		shared = 1;
-	}
-
-	*p = new;
-
-	if (!shared) {
-		desc->depth = 0;
-		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		if (desc->handler && desc->handler->startup)
-			desc->handler->startup(irq);
-		unmask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock,flags);
-
-	register_irq_proc(irq);
-	return 0;
-}
-
-#ifdef CONFIG_SMP
-
-inline void synchronize_irq(unsigned int irq)
-{
-	while (get_irq_desc(irq)->status & IRQ_INPROGRESS)
-		cpu_relax();
-}
-
-EXPORT_SYMBOL(synchronize_irq);
-
-#endif /* CONFIG_SMP */
-
 int request_irq(unsigned int irq,
 	irqreturn_t (*handler)(int, void *, struct pt_regs *),
 	unsigned long irqflags, const char * devname, void *dev_id)
@@ -152,8 +78,10 @@
 
 	if (irq >= NR_IRQS)
 		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
+	if (!handler) {
+		free_irq(irq, dev_id);
+		return 0;
+	}
 
 	action = (struct irqaction *)
 		kmalloc(sizeof(struct irqaction), GFP_KERNEL);
@@ -178,140 +106,6 @@
 
 EXPORT_SYMBOL(request_irq);
 
-void free_irq(unsigned int irq, void *dev_id)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	struct irqaction **p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock,flags);
-	p = &desc->action;
-	for (;;) {
-		struct irqaction * action = *p;
-		if (action) {
-			struct irqaction **pp = p;
-			p = &action->next;
-			if (action->dev_id != dev_id)
-				continue;
-
-			/* Found it - now remove it from the list of entries */
-			*pp = action->next;
-			if (!desc->action) {
-				desc->status |= IRQ_DISABLED;
-				mask_irq(irq);
-			}
-			spin_unlock_irqrestore(&desc->lock,flags);
-
-			/* Wait to make sure it's not being used on another CPU */
-			synchronize_irq(irq);
-			kfree(action);
-			return;
-		}
-		printk("Trying to free free IRQ%d\n",irq);
-		spin_unlock_irqrestore(&desc->lock,flags);
-		break;
-	}
-	return;
-}
-
-EXPORT_SYMBOL(free_irq);
-
-/*
- * Generic enable/disable code: this just calls
- * down into the PIC-specific version for the actual
- * hardware disable after having gotten the irq
- * controller lock. 
- */
- 
-/**
- *	disable_irq_nosync - disable an irq without waiting
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. Unlike disable_irq(), this function does not ensure existing
- *	instances of the IRQ handler have completed before returning.
- *
- *	This function may be called from IRQ context.
- */
- 
-inline void disable_irq_nosync(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	if (!desc->depth++) {
-		if (!(desc->status & IRQ_PER_CPU))
-			desc->status |= IRQ_DISABLED;
-		mask_irq(irq);
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(disable_irq_nosync);
-
-/**
- *	disable_irq - disable an irq and wait for completion
- *	@irq: Interrupt to disable
- *
- *	Disable the selected interrupt line. Disables of an interrupt
- *	stack. That is for two disables you need two enables. This
- *	function waits for any pending IRQ handlers for this interrupt
- *	to complete before returning. If you use this function while
- *	holding a resource the IRQ handler may need you will deadlock.
- *
- *	This function may be called - with care - from IRQ context.
- */
- 
-void disable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	disable_irq_nosync(irq);
-	if (desc->action)
-		synchronize_irq(irq);
-}
-
-EXPORT_SYMBOL(disable_irq);
-
-/**
- *	enable_irq - enable interrupt handling on an irq
- *	@irq: Interrupt to enable
- *
- *	Re-enables the processing of interrupts on this IRQ line
- *	providing no disable_irq calls are now in effect.
- *
- *	This function may be called from IRQ context.
- */
- 
-void enable_irq(unsigned int irq)
-{
-	irq_desc_t *desc = get_irq_desc(irq);
-	unsigned long flags;
-
-	spin_lock_irqsave(&desc->lock, flags);
-	switch (desc->depth) {
-	case 1: {
-		unsigned int status = desc->status & ~IRQ_DISABLED;
-		desc->status = status;
-		if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
-			desc->status = status | IRQ_REPLAY;
-			hw_resend_irq(desc->handler,irq);
-		}
-		unmask_irq(irq);
-		/* fall-through */
-	}
-	default:
-		desc->depth--;
-		break;
-	case 0:
-		printk("enable_irq(%u) unbalanced from %p\n", irq,
-		       __builtin_return_address(0));
-	}
-	spin_unlock_irqrestore(&desc->lock, flags);
-}
-
-EXPORT_SYMBOL(enable_irq);
-
 int show_interrupts(struct seq_file *p, void *v)
 {
 	int i = *(loff_t *) v, j;
@@ -359,106 +153,6 @@
 	return 0;
 }
 
-int handle_irq_event(int irq, struct pt_regs *regs, struct irqaction *action)
-{
-	int status = 0;
-	int retval = 0;
-
-	if (!(action->flags & SA_INTERRUPT))
-		local_irq_enable();
-
-	do {
-		status |= action->flags;
-		retval |= action->handler(irq, action->dev_id, regs);
-		action = action->next;
-	} while (action);
-	if (status & SA_SAMPLE_RANDOM)
-		add_interrupt_randomness(irq);
-	local_irq_disable();
-	return retval;
-}
-
-static void __report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	struct irqaction *action;
-
-	if (action_ret != IRQ_HANDLED && action_ret != IRQ_NONE) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
-	} else {
-		printk(KERN_ERR "irq %d: nobody cared!\n", irq);
-	}
-	dump_stack();
-	printk(KERN_ERR "handlers:\n");
-	action = desc->action;
-	do {
-		printk(KERN_ERR "[<%p>]", action->handler);
-		print_symbol(" (%s)",
-			(unsigned long)action->handler);
-		printk("\n");
-		action = action->next;
-	} while (action);
-}
-
-static void report_bad_irq(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	static int count = 100;
-
-	if (count) {
-		count--;
-		__report_bad_irq(irq, desc, action_ret);
-	}
-}
-
-static int noirqdebug;
-
-static int __init noirqdebug_setup(char *str)
-{
-	noirqdebug = 1;
-	printk("IRQ lockup detection disabled\n");
-	return 1;
-}
-
-__setup("noirqdebug", noirqdebug_setup);
-
-/*
- * If 99,900 of the previous 100,000 interrupts have not been handled then
- * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
- * turn the IRQ off.
- *
- * (The other 100-of-100,000 interrupts may have been a correctly-functioning
- *  device sharing an IRQ with the failing one)
- *
- * Called under desc->lock
- */
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
-{
-	if (action_ret != IRQ_HANDLED) {
-		desc->irqs_unhandled++;
-		if (action_ret != IRQ_NONE)
-			report_bad_irq(irq, desc, action_ret);
-	}
-
-	desc->irq_count++;
-	if (desc->irq_count < 100000)
-		return;
-
-	desc->irq_count = 0;
-	if (desc->irqs_unhandled > 99900) {
-		/*
-		 * The interrupt is stuck
-		 */
-		__report_bad_irq(irq, desc, action_ret);
-		/*
-		 * Now kill the IRQ
-		 */
-		printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
-		desc->status |= IRQ_DISABLED;
-		desc->handler->disable(irq);
-	}
-	desc->irqs_unhandled = 0;
-}
-
 /*
  * Eventually, this should take an array of interrupts and an array size
  * so it can dispatch multiple interrupts.
@@ -479,7 +173,7 @@
 	if (desc->status & IRQ_PER_CPU) {
 		/* no locking required for CPU-local interrupts: */
 		ack_irq(irq);
-		action_ret = handle_irq_event(irq, regs, desc->action);
+		action_ret = generic_handle_IRQ_event(irq, regs, desc->action);
 		desc->handler->end(irq);
 		return;
 	}
@@ -527,6 +221,9 @@
 	if (unlikely(!action))
 		goto out;
 
+	if (generic_redirect_hardirq(desc))
+		goto out_no_end;
+
 	/*
 	 * Edge triggered interrupts need to remember
 	 * pending events.
@@ -553,11 +250,11 @@
 				set_bits(irqtp->flags, &curtp->flags);
 		} else
 #endif
-			action_ret = handle_irq_event(irq, regs, action);
+			action_ret = generic_handle_IRQ_event(irq, regs, action);
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			generic_note_interrupt(irq, desc, action_ret);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;
@@ -574,6 +271,8 @@
 		else if (desc->handler->enable)
 			desc->handler->enable(irq);
 	}
+
+out_no_end:
 	spin_unlock(&desc->lock);
 }
 
@@ -687,174 +386,6 @@
 	irq_ctx_init();
 }
 
-static struct proc_dir_entry * root_irq_dir;
-static struct proc_dir_entry * irq_dir [NR_IRQS];
-static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
-
-/* Protected by get_irq_desc(irq)->lock. */
-#ifdef CONFIG_IRQ_ALL_CPUS
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-#else  /* CONFIG_IRQ_ALL_CPUS */
-cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_NONE };
-#endif /* CONFIG_IRQ_ALL_CPUS */
-
-static int irq_affinity_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int irq_affinity_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	unsigned int irq = (long)data;
-	irq_desc_t *desc = get_irq_desc(irq);
-	int ret;
-	cpumask_t new_value, tmp;
-
-	if (!desc->handler->set_affinity)
-		return -EIO;
-
-	ret = cpumask_parse(buffer, count, new_value);
-	if (ret != 0)
-		return ret;
-
-	/*
-	 * We check for CPU_MASK_ALL in xics to send irqs to all cpus.
-	 * In some cases CPU_MASK_ALL is smaller than the cpumask (eg
-	 * NR_CPUS == 32 and cpumask is a long), so we mask it here to
-	 * be consistent.
-	 */
-	cpus_and(new_value, new_value, CPU_MASK_ALL);
-
-	/*
-	 * Grab lock here so cpu_online_map can't change, and also
-	 * protect irq_affinity[].
-	 */
-	spin_lock(&desc->lock);
-
-	/*
-	 * Do not allow disabling IRQs completely - it's a too easy
-	 * way to make the system unusable accidentally :-) At least
-	 * one online CPU still has to be targeted.
-	 */
-	cpus_and(tmp, new_value, cpu_online_map);
-	if (cpus_empty(tmp)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	irq_affinity[irq] = new_value;
-	desc->handler->set_affinity(irq, new_value);
-	ret = count;
-
-out:
-	spin_unlock(&desc->lock);
-	return ret;
-}
-
-static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
-			int count, int *eof, void *data)
-{
-	int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
-	if (count - len < 2)
-		return -EINVAL;
-	len += sprintf(page + len, "\n");
-	return len;
-}
-
-static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffer,
-					unsigned long count, void *data)
-{
-	cpumask_t *mask = (cpumask_t *)data;
-	unsigned long full_count = count, err;
-	cpumask_t new_value;
-
-	err = cpumask_parse(buffer, count, new_value);
-	if (err)
-		return err;
-
-	*mask = new_value;
-
-#ifdef CONFIG_PPC_ISERIES
-	{
-		unsigned i;
-		for (i=0; i<NR_CPUS; ++i) {
-			if ( paca[i].prof_buffer && cpu_isset(i, new_value) )
-				paca[i].prof_enabled = 1;
-			else
-				paca[i].prof_enabled = 0;
-		}
-	}
-#endif
-
-	return full_count;
-}
-
-#define MAX_NAMELEN 10
-
-static void register_irq_proc (unsigned int irq)
-{
-	struct proc_dir_entry *entry;
-	char name [MAX_NAMELEN];
-
-	if (!root_irq_dir || (irq_desc[irq].handler == NULL) || irq_dir[irq])
-		return;
-
-	memset(name, 0, MAX_NAMELEN);
-	sprintf(name, "%d", irq);
-
-	/* create /proc/irq/1234 */
-	irq_dir[irq] = proc_mkdir(name, root_irq_dir);
-
-	/* create /proc/irq/1234/smp_affinity */
-	entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
-
-	if (entry) {
-		entry->nlink = 1;
-		entry->data = (void *)(long)irq;
-		entry->read_proc = irq_affinity_read_proc;
-		entry->write_proc = irq_affinity_write_proc;
-	}
-
-	smp_affinity_entry[irq] = entry;
-}
-
-unsigned long prof_cpu_mask = -1;
-
-void init_irq_proc (void)
-{
-	struct proc_dir_entry *entry;
-	int i;
-
-	/* create /proc/irq */
-	root_irq_dir = proc_mkdir("irq", NULL);
-
-	/* create /proc/irq/prof_cpu_mask */
-	entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
-
-	if (!entry)
-		return;
-
-	entry->nlink = 1;
-	entry->data = (void *)&prof_cpu_mask;
-	entry->read_proc = prof_cpu_mask_read_proc;
-	entry->write_proc = prof_cpu_mask_write_proc;
-
-	/*
-	 * Create entries for all existing IRQs.
-	 */
-	for_each_irq(i) {
-		if (get_irq_desc(i)->handler == NULL)
-			continue;
-		register_irq_proc(i);
-	}
-}
-
 irqreturn_t no_action(int irq, void *dev, struct pt_regs *regs)
 {
 	return IRQ_NONE;
@@ -1014,3 +545,4 @@
 
 #endif /* CONFIG_IRQSTACKS */
 
+struct hw_interrupt_type no_irq_type;
diff -urN vpP7/arch/ppc64/kernel/misc.S vpP7-ppc/arch/ppc64/kernel/misc.S
--- vpP7/arch/ppc64/kernel/misc.S	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/misc.S	2004-08-23 17:27:11.000000000 -0400
@@ -120,7 +120,7 @@
 	std	r0,16(r1)
 	stdu	r1,THREAD_SIZE-112(r6)
 	mr	r1,r6
-	bl	.handle_irq_event
+	bl	.generic_handle_IRQ_event
 	ld	r1,0(r1)
 	ld	r0,16(r1)
 	mtlr	r0
@@ -600,6 +600,35 @@
 	ld	r30,-16(r1)
 	blr
 
+#ifdef CONFIG_LATENCY_TRACE
+
+_GLOBAL(_mcount)
+	ld	r5, 0(r1)
+	mflr	r3
+	stdu	r1, -112(r1)
+	ld	r4, 16(r5)
+	std	r3, 128(r1)
+
+	// Don't call do_mcount if we haven't relocated to
+	// 0xc000000000000000 yet.  This assumes that the ordinary
+	// load address is below 0x8000000000000000.
+
+	lis	r6, 0x8000
+	rldicr	r6, r6, 32, 31
+	and.	r0, r3, r6
+	
+	beq-	mcount_out
+	bl	.do_mcount
+mcount_out:
+
+	ld	r0, 128(r1)
+	mtlr	r0
+
+	addi	r1, r1, 112
+	blr
+
+#endif
+
 #ifdef CONFIG_PPC_ISERIES	/* hack hack hack */
 #define ppc_rtas	sys_ni_syscall
 #endif
diff -urN vpP7/arch/ppc64/kernel/open_pic.c vpP7-ppc/arch/ppc64/kernel/open_pic.c
--- vpP7/arch/ppc64/kernel/open_pic.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/open_pic.c	2004-08-23 16:44:16.000000000 -0400
@@ -78,6 +78,12 @@
 
 OpenPIC_SourcePtr ISU[OPENPIC_MAX_ISU];
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+static void openpic_ack_irq(unsigned int irq);
+#else
+#define openpic_ack_irq NULL
+#endif
+
 static void openpic_end_irq(unsigned int irq_nr);
 static void openpic_set_affinity(unsigned int irq_nr, cpumask_t cpumask);
 
@@ -87,7 +93,7 @@
 	NULL,
 	openpic_enable_irq,
 	openpic_disable_irq,
-	NULL,
+	openpic_ack_irq,
 	openpic_end_irq,
 	openpic_set_affinity
 };
@@ -440,7 +446,7 @@
 
 	if (naca->interrupt_controller == IC_OPEN_PIC) {
 		/* Initialize the cascade */
-		if (request_irq(NUM_ISA_INTERRUPTS, no_action, SA_INTERRUPT,
+		if (request_irq(NUM_ISA_INTERRUPTS, no_action, SA_INTERRUPT | SA_NODELAY,
 				"82c59 cascade", NULL))
 			printk(KERN_ERR "Unable to get OpenPIC IRQ 0 for cascade\n");
 		i8259_init();
@@ -641,13 +647,13 @@
 		return;
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
-	request_irq(openpic_vec_ipi, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI0 (call function)", NULL);
-	request_irq(openpic_vec_ipi+1, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+1, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI1 (reschedule)", NULL);
-	request_irq(openpic_vec_ipi+2, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+2, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI2 (unused)", NULL);
-	request_irq(openpic_vec_ipi+3, openpic_ipi_action, SA_INTERRUPT,
+	request_irq(openpic_vec_ipi+3, openpic_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		   "IPI3 (debugger break)", NULL);
 
 	for ( i = 0; i < OPENPIC_NUM_IPI ; i++ )
@@ -834,11 +840,28 @@
 }
 #endif
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+
+static void openpic_ack_irq(unsigned int irq_nr)
+{
+	openpic_disable_irq(irq_nr);
+	openpic_eoi();
+}
+
+static void openpic_end_irq(unsigned int irq_nr)
+{
+	openpic_enable_irq(irq_nr);
+}
+
+#else
+
 static void openpic_end_irq(unsigned int irq_nr)
 {
 	openpic_eoi();
 }
 
+#endif
+
 static void openpic_set_affinity(unsigned int irq_nr, cpumask_t cpumask)
 {
 	cpumask_t tmp;
diff -urN vpP7/arch/ppc64/kernel/open_pic_u3.c vpP7-ppc/arch/ppc64/kernel/open_pic_u3.c
--- vpP7/arch/ppc64/kernel/open_pic_u3.c	2004-06-16 01:18:37.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/open_pic_u3.c	2004-08-23 16:43:03.000000000 -0400
@@ -251,11 +251,30 @@
 				 (sense ? OPENPIC_SENSE_LEVEL : 0));
 }
 
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+
+static void openpic2_ack_irq(unsigned int irq_nr)
+{
+	openpic2_disable_irq(irq_nr);
+	openpic2_eoi();
+}
+
+static void openpic2_end_irq(unsigned int irq_nr)
+{
+	openpic2_enable_irq(irq_nr);
+}
+
+#else
+
+#define openpic2_ack_irq NULL
+
 static void openpic2_end_irq(unsigned int irq_nr)
 {
 	openpic2_eoi();
 }
 
+#endif
+
 int openpic2_get_irq(struct pt_regs *regs)
 {
 	int irq = openpic2_irq();
@@ -271,7 +290,7 @@
 	NULL,
 	openpic2_enable_irq,
 	openpic2_disable_irq,
-	NULL,
+	openpic2_ack_irq,
 	openpic2_end_irq,
 };
 
diff -urN vpP7/arch/ppc64/kernel/pmac_setup.c vpP7-ppc/arch/ppc64/kernel/pmac_setup.c
--- vpP7/arch/ppc64/kernel/pmac_setup.c	2004-06-16 01:18:58.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/pmac_setup.c	2004-08-23 13:57:14.000000000 -0400
@@ -409,8 +409,8 @@
  */
 static int __init pmac_irq_cascade_init(void)
 {
-	if (request_irq(pmac_cascade_irq, pmac_u3_do_cascade, 0,
-			"U3->K2 Cascade", NULL))
+	if (request_irq(pmac_cascade_irq, pmac_u3_do_cascade,
+	                SA_NODELAY | SA_INTERRUPT, "U3->K2 Cascade", NULL))
 		printk(KERN_ERR "Unable to get OpenPIC IRQ for cascade\n");
 	return 0;
 }
diff -urN vpP7/arch/ppc64/kernel/xics.c vpP7-ppc/arch/ppc64/kernel/xics.c
--- vpP7/arch/ppc64/kernel/xics.c	2004-08-17 15:22:34.000000000 -0400
+++ vpP7-ppc/arch/ppc64/kernel/xics.c	2004-08-23 13:57:14.000000000 -0400
@@ -572,7 +572,7 @@
 	if (naca->interrupt_controller == IC_PPC_XIC &&
 	    xics_irq_8259_cascade != -1) {
 		if (request_irq(irq_offset_up(xics_irq_8259_cascade),
-				no_action, 0, "8259 cascade", NULL))
+				no_action, SA_NODELAY, "8259 cascade", NULL))
 			printk(KERN_ERR "xics_setup_i8259: couldn't get 8259 "
 					"cascade\n");
 		i8259_init();
@@ -587,7 +587,7 @@
 	virt_irq_to_real_map[XICS_IPI] = XICS_IPI;
 
 	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
-	request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT,
+	request_irq(irq_offset_up(XICS_IPI), xics_ipi_action, SA_INTERRUPT | SA_NODELAY,
 		    "IPI", NULL);
 	get_irq_desc(irq_offset_up(XICS_IPI))->status |= IRQ_PER_CPU;
 }
diff -urN vpP7/include/asm-i386/hw_irq.h vpP7-ppc/include/asm-i386/hw_irq.h
--- vpP7/include/asm-i386/hw_irq.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/asm-i386/hw_irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -54,7 +54,6 @@
 void init_8259A(int aeoi);
 void FASTCALL(send_IPI_self(int vector));
 void init_VISWS_APIC_irqs(void);
-extern void init_hardirqs(void);
 void setup_IO_APIC(void);
 void disable_IO_APIC(void);
 void print_IO_APIC(void);
diff -urN vpP7/include/asm-ppc/hardirq.h vpP7-ppc/include/asm-ppc/hardirq.h
--- vpP7/include/asm-ppc/hardirq.h	2004-06-16 01:18:37.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/hardirq.h	2004-08-23 13:57:14.000000000 -0400
@@ -5,7 +5,7 @@
 #include <linux/config.h>
 #include <linux/cache.h>
 #include <linux/smp_lock.h>
-#include <asm/irq.h>
+#include <linux/irq.h>
 
 /* The __last_jiffy_stamp field is needed to ensure that no decrementer
  * interrupt is lost on SMP machines. Since on most CPUs it is in the same
@@ -71,15 +71,11 @@
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
  */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
+#define in_irq()		(hardirq_count() || (current->flags & PF_HARDIRQ))
+#define in_softirq()		(softirq_count() || (current->flags & PF_SOFTIRQ))
 #define in_interrupt()		(irq_count())
 
-
-#define hardirq_trylock()	(!in_interrupt())
-#define hardirq_endlock()	do { } while (0)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define irq_enter()		(add_preempt_count(HARDIRQ_OFFSET))
 
 #ifdef CONFIG_PREEMPT
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
@@ -94,17 +90,41 @@
 
 #define irq_exit()							\
 do {									\
-	preempt_count() -= IRQ_EXIT_OFFSET;				\
+	sub_preempt_count(IRQ_EXIT_OFFSET);				\
 	if (!in_interrupt() && softirq_pending(smp_processor_id()))	\
 		do_softirq();						\
 	preempt_enable_no_resched();					\
 } while (0)
 
-#ifndef CONFIG_SMP
-# define synchronize_irq(irq)	barrier()
-#else
-  extern void synchronize_irq(unsigned int irq);
-#endif /* CONFIG_SMP */
+static inline void synchronize_irq(unsigned int irq)
+{
+	generic_synchronize_irq(irq);
+}
+
+static inline void free_irq(unsigned int irq, void *dev_id)
+{
+	generic_free_irq(irq, dev_id);
+}
+
+static inline void disable_irq_nosync(unsigned int irq)
+{
+	generic_disable_irq_nosync(irq);
+}
+
+static inline void disable_irq(unsigned int irq)
+{
+	generic_disable_irq(irq);
+}
+
+static inline void enable_irq(unsigned int irq)
+{
+	generic_enable_irq(irq);
+}
+
+static inline int setup_irq(unsigned int irq, struct irqaction *action)
+{
+	return generic_setup_irq(irq, action);
+}
 
 #endif /* __ASM_HARDIRQ_H */
 #endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc/irq.h vpP7-ppc/include/asm-ppc/irq.h
--- vpP7/include/asm-ppc/irq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -6,10 +6,6 @@
 #include <asm/machdep.h>		/* ppc_md */
 #include <asm/atomic.h>
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /*
  * These constants are used for passing information about interrupt
  * signal polarity and level/edge sensing to the low-level PIC chip
@@ -324,7 +320,6 @@
 
 struct irqaction;
 struct pt_regs;
-int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
 
 #endif /* _ASM_IRQ_H */
 #endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc/signal.h vpP7-ppc/include/asm-ppc/signal.h
--- vpP7/include/asm-ppc/signal.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc/signal.h	2004-08-23 13:57:14.000000000 -0400
@@ -111,6 +111,7 @@
 #define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_NODELAY              0x02000000
 #endif /* __KERNEL__ */
 
 #define SIG_BLOCK          0	/* for blocking signals */
diff -urN vpP7/include/asm-ppc64/hardirq.h vpP7-ppc/include/asm-ppc64/hardirq.h
--- vpP7/include/asm-ppc64/hardirq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/hardirq.h	2004-08-23 13:57:14.000000000 -0400
@@ -12,6 +12,7 @@
 #include <linux/config.h>
 #include <linux/cache.h>
 #include <linux/preempt.h>
+#include <linux/irq.h>
 
 typedef struct {
 	unsigned int __softirq_pending;
@@ -70,15 +71,11 @@
  * Are we doing bottom half or hardware interrupt processing?
  * Are we in a softirq context? Interrupt context?
  */
-#define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
+#define in_irq()		(hardirq_count() || (current->flags & PF_HARDIRQ))
+#define in_softirq()		(softirq_count() || (current->flags & PF_SOFTIRQ))
 #define in_interrupt()		(irq_count())
 
-
-#define hardirq_trylock()	(!in_interrupt())
-#define hardirq_endlock()	do { } while (0)
-
-#define irq_enter()		(preempt_count() += HARDIRQ_OFFSET)
+#define irq_enter()		(add_preempt_count(HARDIRQ_OFFSET))
 
 #ifdef CONFIG_PREEMPT
 # define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
@@ -89,20 +86,44 @@
 # define preemptible()	0
 # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
 #endif
+
 #define irq_exit()							\
 do {									\
-		preempt_count() -= IRQ_EXIT_OFFSET;			\
-		if (!in_interrupt() && softirq_pending(smp_processor_id())) \
-			do_softirq();					\
-		preempt_enable_no_resched();				\
+	sub_preempt_count(IRQ_EXIT_OFFSET);				\
+	if (!in_interrupt() && softirq_pending(smp_processor_id()))	\
+		do_softirq();						\
+	preempt_enable_no_resched();					\
 } while (0)
 
-#ifndef CONFIG_SMP
-# define synchronize_irq(irq)	barrier()
-#else
-  extern void synchronize_irq(unsigned int irq);
-#endif /* CONFIG_SMP */
+static inline void synchronize_irq(unsigned int irq)
+{
+	generic_synchronize_irq(irq);
+}
+
+static inline void free_irq(unsigned int irq, void *dev_id)
+{
+	generic_free_irq(irq, dev_id);
+}
+
+static inline void disable_irq_nosync(unsigned int irq)
+{
+	generic_disable_irq_nosync(irq);
+}
+
+static inline void disable_irq(unsigned int irq)
+{
+	generic_disable_irq(irq);
+}
+
+static inline void enable_irq(unsigned int irq)
+{
+	generic_enable_irq(irq);
+}
+
+static inline int setup_irq(unsigned int irq, struct irqaction *action)
+{
+	return generic_setup_irq(irq, action);
+}
 
-#endif /* __KERNEL__ */
-	
 #endif /* __ASM_HARDIRQ_H */
+#endif /* __KERNEL__ */
diff -urN vpP7/include/asm-ppc64/irq.h vpP7-ppc/include/asm-ppc64/irq.h
--- vpP7/include/asm-ppc64/irq.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/irq.h	2004-08-23 15:42:48.000000000 -0400
@@ -17,10 +17,6 @@
  */
 #define NR_IRQS		512
 
-extern void disable_irq(unsigned int);
-extern void disable_irq_nosync(unsigned int);
-extern void enable_irq(unsigned int);
-
 /* this number is used when no interrupt has been assigned */
 #define NO_IRQ			(-1)
 
@@ -80,7 +76,6 @@
 
 struct irqaction;
 struct pt_regs;
-int handle_irq_event(int, struct pt_regs *, struct irqaction *);
 
 #ifdef CONFIG_IRQSTACKS
 /*
diff -urN vpP7/include/asm-ppc64/signal.h vpP7-ppc/include/asm-ppc64/signal.h
--- vpP7/include/asm-ppc64/signal.h	2004-08-17 15:22:36.000000000 -0400
+++ vpP7-ppc/include/asm-ppc64/signal.h	2004-08-23 13:57:14.000000000 -0400
@@ -108,6 +108,7 @@
 #define SA_PROBE		SA_ONESHOT
 #define SA_SAMPLE_RANDOM	SA_RESTART
 #define SA_SHIRQ		0x04000000
+#define SA_NODELAY              0x02000000
 #endif
 
 #define SIG_BLOCK          0	/* for blocking signals */
diff -urN vpP7/include/linux/interrupt.h vpP7-ppc/include/linux/interrupt.h
--- vpP7/include/linux/interrupt.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/interrupt.h	2004-08-23 13:57:14.000000000 -0400
@@ -95,7 +95,6 @@
 	void	*data;
 };
 
-extern void do_hardirq(irq_desc_t *desc);
 asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
diff -urN vpP7/include/linux/irq.h vpP7-ppc/include/linux/irq.h
--- vpP7/include/linux/irq.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/irq.h	2004-08-23 13:57:14.000000000 -0400
@@ -83,6 +83,8 @@
 extern void generic_disable_irq(unsigned int irq);
 extern void generic_enable_irq(unsigned int irq);
 extern void generic_note_interrupt(int irq, irq_desc_t *desc, int action_ret);
+extern void do_hardirq(irq_desc_t *desc);
+extern void init_hardirqs(void);
 
 extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
 
diff -urN vpP7/include/linux/kernel.h vpP7-ppc/include/linux/kernel.h
--- vpP7/include/linux/kernel.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/kernel.h	2004-08-23 13:57:14.000000000 -0400
@@ -48,7 +48,10 @@
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int voluntary_resched(void);
 #else
-# define voluntary_resched() 0
+static inline int voluntary_resched(void)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff -urN vpP7/include/linux/sched.h vpP7-ppc/include/linux/sched.h
--- vpP7/include/linux/sched.h	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/include/linux/sched.h	2004-08-23 13:57:14.000000000 -0400
@@ -1114,8 +1114,6 @@
  * submitted upstream will of course use need_resched()/cond_resched().
  */
 
-extern int voluntary_resched(void);
-
 static inline int voluntary_need_resched(void)
 {
 	if (voluntary_preemption >= 1)
@@ -1136,9 +1134,15 @@
 }
 
 #else
-# define voluntary_resched() 0
-# define voluntary_resched_lock(lock) 0
-# define voluntary_need_resched() 0
+static inline int voluntary_resched_lock(spinlock_t *lock)
+{
+	return 0;
+}
+
+static inline int voluntary_need_resched(void)
+{
+	return 0;
+}
 #endif
 
 /* Reevaluate whether the task has signals pending delivery.
diff -urN vpP7/init/main.c vpP7-ppc/init/main.c
--- vpP7/init/main.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/init/main.c	2004-08-23 13:57:14.000000000 -0400
@@ -397,9 +397,9 @@
 
 static void noinline rest_init(void)
 {
+	system_state = SYSTEM_BOOTING_SCHEDULER_OK;
 	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
 	numa_default_policy();
-	system_state = SYSTEM_BOOTING_SCHEDULER_OK;
 	unlock_kernel();
  	cpu_idle();
 } 
@@ -669,6 +669,8 @@
 	smp_init();
 	sched_init_smp();
 
+	init_hardirqs();
+
 	/*
 	 * Do this before initcalls, because some drivers want to access
 	 * firmware files.
diff -urN vpP7/kernel/hardirq.c vpP7-ppc/kernel/hardirq.c
--- vpP7/kernel/hardirq.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/kernel/hardirq.c	2004-08-23 13:57:14.000000000 -0400
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/kallsyms.h>
 #include <linux/proc_fs.h>
+#include <linux/irq.h>
 #include <asm/uaccess.h>
 
 extern struct irq_desc irq_desc[NR_IRQS];
@@ -31,9 +32,8 @@
 	if (voluntary_preemption < 3 || (desc->status & IRQ_NODELAY))
 		return 0;
 
-	BUG_ON(!desc->thread);
 	BUG_ON(!irqs_disabled());
-	if (desc->thread->state != TASK_RUNNING)
+	if (desc->thread && desc->thread->state != TASK_RUNNING)
 		wake_up_process(desc->thread);
 
 	return 1;
@@ -369,7 +369,10 @@
 	if (!shared) {
 		desc->depth = 0;
 		desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING | IRQ_INPROGRESS);
-		desc->handler->startup(irq);
+		if (desc->handler->startup)
+			desc->handler->startup(irq);
+		else
+			desc->handler->enable(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock,flags);
 
@@ -420,7 +423,10 @@
 			*pp = action->next;
 			if (!desc->action) {
 				desc->status |= IRQ_DISABLED;
-				desc->handler->shutdown(irq);
+				if (desc->handler->shutdown)
+					desc->handler->shutdown(irq);
+				else
+					desc->handler->disable(irq);
 			}
 			recalculate_desc_flags(desc);
 			spin_unlock_irqrestore(&desc->lock,flags);
@@ -480,9 +486,11 @@
 	return 0;
 }
 
+static int ok_to_create_irq_threads;
+
 static int start_irq_thread(int irq, struct irq_desc *desc)
 {
-	if (desc->thread)
+	if (desc->thread || !ok_to_create_irq_threads)
 		return 0;
 
 	printk("requesting new irq thread for IRQ%d...\n", irq);
@@ -492,9 +500,31 @@
 		return -ENOMEM;
 	}
 
+	// An interrupt may have come in before the thread pointer was
+	// stored in desc->thread; make sure the thread gets woken up in
+	// such a case.
+	
+	smp_mb();
+	
+	if (desc->status & IRQ_INPROGRESS)
+		wake_up_process(desc->thread);
+	
 	return 0;
 }
 
+void init_hardirqs(void)
+{	
+	int i;
+	ok_to_create_irq_threads = 1;
+
+	for (i = 0; i < NR_IRQS; i++) {
+		irq_desc_t *desc = irq_desc + i;
+		
+		if (desc->action && !(desc->status & IRQ_NODELAY))
+			start_irq_thread(i, desc);
+	}
+}
+
 #ifdef CONFIG_SMP
 
 static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
diff -urN vpP7/kernel/latency.c vpP7-ppc/kernel/latency.c
--- vpP7/kernel/latency.c	2004-08-23 13:41:28.000000000 -0400
+++ vpP7-ppc/kernel/latency.c	2004-08-23 14:06:38.000000000 -0400
@@ -16,6 +16,7 @@
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
+#include <asm/time.h>
 
 unsigned long preempt_thresh;
 unsigned long preempt_max_latency;
@@ -100,6 +101,8 @@
 	___trace(eip, parent_eip);
 }
 
+#ifdef CONFIG_X86
+
 void notrace mcount(void)
 {
 	MCOUNT_HEAD
@@ -112,6 +115,22 @@
 
 EXPORT_SYMBOL(mcount);
 
+#else
+
+#ifdef CONFIG_PPC
+void _mcount(void);
+EXPORT_SYMBOL(_mcount);
+#else
+#error What is mcount called?
+#endif
+
+void notrace do_mcount(void *func, void *called_from)
+{
+	___trace((unsigned long)func, (unsigned long)called_from);
+}
+
+#endif
+
 static void notrace print_name(struct seq_file *m, unsigned long eip)
 {
 	char namebuf[KSYM_NAME_LEN+1];
@@ -142,7 +161,13 @@
 
 static unsigned long notrace cycles_to_usecs(cycles_t delta)
 {
+#ifdef CONFIG_X86
 	do_div(delta, cpu_khz/1000);
+#elif defined(CONFIG_PPC)
+	delta = mulhwu(tb_to_us, delta);
+#else
+	#error Implement cycles_to_usecs.
+#endif
 
 	return (unsigned long) delta;
 }
@@ -248,18 +273,15 @@
 #endif
 	unsigned long parent_eip = (unsigned long)__builtin_return_address(1);
 	unsigned long latency;
-	cycles_t delta;
 
 	atomic_inc(&tr->disabled);
-	delta = get_cycles() - tr->preempt_timestamp;
-	do_div(delta, cpu_khz/1024);
-	latency = (unsigned long) delta;
+	latency = cycles_to_usecs(get_cycles() - tr->preempt_timestamp);
 
 	if (preempt_thresh) {
 		if (latency < preempt_thresh)
 			goto out;
 	} else {
-		if (latency < preempt_max_latency)
+		if (latency <= preempt_max_latency)
 			goto out;
 	}
 
--- vpP7/kernel/sysctl.c	2004-08-23 17:39:58.000000000 -0400
+++ vpP7-ppc/kernel/sysctl.c	2004-08-23 17:56:41.000000000 -0400
@@ -285,7 +285,7 @@
 		.data		= &preempt_max_latency,
 		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
+		.proc_handler	= &proc_doulongvec_minmax,
 	},
 #ifdef CONFIG_LATENCY_TRACE
 	{

^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-08-30 19:13 Mark_H_Johnson
  2004-08-30 19:21 ` Ingo Molnar
                   ` (2 more replies)
  0 siblings, 3 replies; 122+ messages in thread
From: Mark_H_Johnson @ 2004-08-30 19:13 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, linux-kernel, Felipe Alfaro Solana, Daniel Schmitt,
	Lee Revell

>i've uploaded -Q5 to:
> [snip the rest...]

Thanks.

This appears to be the first 2.6.x kernel I've run that has results
comparable to 2.4.x kernels with low latency patches and kernel preemption.
The few remaining symptoms I see include:

 - a few long (> 1 msec) delays in the real time CPU loop (no system calls)
 - varying time to complete the write system call (for audio) - much
different than 2.4
 - a couple latency traces (> 700 usec) in the network driver

For reference, these tests were performed on the following SMP system:
  Dual 866 Mhz Pentium III
  512 Mbyte memory
  IDE system disk (DMA enabled)
The basic test is Benno's latency test
(http://www.gardena.net/benno/linux/audio) with some slight modifications
to the tests to keep the second CPU busy (non real time CPU burner) and to
add network I/O tests. The 2.4 tests were run with 2.4.20, the 2.6 tests
were run with 2.4.9-rc1-Q5. On 2.6, voluntary_preemption,
kernel_preemption, hardirq_preemption, and softirq_preemption are all 1. I
also set
  /sys/block/hda/queue/max_sectors_kb = 32
  /sys/block/hda/queue/read_ahead_kb = 32
  /proc/sys/net/core/netdev_max_backlog = 8
and the audio driver was set to be non-threaded.

BASIC RESULTS
=============
Comparison of results between 2.6.x and 2.4.x; values in milliseconds.
Nominal values for the write operation is 1.45 msec; the CPU loop is 1.16
msec.

          Max CPU Delta     Max Write Delta
Test     2.4.x     2.6.x    2.4.x     2.6.x
X11       0.10      0.16     0.05      0.65
/proc     0.07      0.17     0.05      0.65
net out   0.15      0.19     0.05      0.75
net in    0.17      0.23     0.05      0.95
dsk wrt   0.49      0.18     0.25      1.05
dsk copy  2.48      0.68     2.25      1.25
disk rd   3.03      1.61     2.75      1.35

LONG DELAYS
===========

Note I still see over 110% worst case overhead on a max priority real time
CPU task (no system calls) when doing heavy disk I/O on 2.6. It is much
better than 2.4, but still disturbing. What I would hope would happen on a
dual CPU system like mine, is that the real time task tends to be on one
CPU and the other system activity would tend to stay on the other CPU.
However, the results do not seem to indicate that behavior.

VARYING SYSTEM CALL TIMES
=========================

In 2.4, it appears that the duration of the write system call is basically
fixed and dependent on the duration of the audio fragment. In 2.6, this
behavior is now different. If I look at the chart in detail, it appears the
system is queueing up several write operations during the first few seconds
of testing. You can see this by consistently low elapsed times for the
write system call. Then the elapsed time for the write bounces up / down in
a sawtooth pattern over a 1 msec range. Could someone explain the cause of
this new behavior and if there is a setting to restore the old behavior? I
am concerned that this queueing adds latency to audio operations (when
trying to synchronize audio with other real time behavior).

LONG NETWORK LATENCIES
======================

In about 25 minutes of heavy testing, I had two latency traces with
/proc/sys/kernel/preempt_max_latency set to 700. They had the same start /
end location with the long delay as follows:
  730 us, entries: 361
  ...
  started at rtl8139_poll+0x3c/0x160
  ended at   rtl8139_poll+0x100/0x160
  00000001 0.000ms (+0.000ms): rtl8139_poll (net_rx_action)
  00000001 0.140ms (+0.140ms): rtl8139_rx (rtl8139_poll)
  00000001 0.556ms (+0.416ms): alloc_skb (rtl8139_rx)
  ... remaining items all > +0.005ms ...

  731 us, entries: 360
  ...
  started at rtl8139_poll+0x3c/0x160
  ended at   rtl8139_poll+0x100/0x160
  00000001 0.000ms (+0.000ms): rtl8139_poll (net_rx_action)
  00000001 0.000ms (+0.000ms): rtl8139_rx (rtl8139_poll)
  00000001 0.002ms (+0.001ms): alloc_skb (rtl8139_rx)
  00000001 0.141ms (+0.139ms): kmem_cache_alloc (alloc_skb)
  00000001 0.211ms (+0.070ms): __kmalloc (alloc_skb)
  00000001 0.496ms (+0.284ms): eth_type_trans (rtl8139_rx)
  00000001 0.565ms (+0.068ms): netif_receive_skb (rtl8139_rx)
  ... remaining items all > +0.005ms ...

Still much better than my previous results (before setting
netdev_max_backlog).

I will be running some additional tests
 - reducing preempt_max_latency
 - running with sortirq and hardirq_preemption=0
to see if these uncover any further problems.

Thanks again for the good work.
--Mark H Johnson
  <mailto:Mark_H_Johnson@raytheon.com>


^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-08-30 22:04 Mark_H_Johnson
  2004-08-31  6:31 ` Ingo Molnar
  2004-09-01  7:30 ` Ingo Molnar
  0 siblings, 2 replies; 122+ messages in thread
From: Mark_H_Johnson @ 2004-08-30 22:04 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, linux-kernel, Felipe Alfaro Solana, Daniel Schmitt,
	Lee Revell

>regarding this particular latency, could you try the attached patch
>ontop of -Q5? It turns the ->poll() loop into separate, individually
>preemptable iterations instead of one batch of processing. In theory
>this should result in latency being lower regardless of the
>netdev_max_backlog value.

First time - stopped during init script - when trying to start a network
service (automount).
Second time - stopped during init script - did not get the "OK" for
bringing up interface eth0.
Alt-SysRq-P still does not show any data [not sure why].
Alt-SysRq-T captured data - for example, shows dhclient in sys_select ->
__pollwait -> do_select -> process_timeout -> add_wait_queue ->
schedule_timeout -> __mod_timer. Very odd, almost every other task (except
initlog) is stuck in one of:
 - generic_handle_IRQ_event
 - sub_preempt_count
 - do_irqd
 - do_hardirq
and all the tasks I can see have "S" status.
Third time - ditto - back to stopping at automount start up.
Alt-SysRq-T captured data, again everything I could look at in "S" mode.
In all three cases, another system attempting to "ping" the system under
test failed to get any responses.

In all cases, Ctrl-Alt-Del was good enough to get a clean reboot.

This looks like a bad patch; will go back to the last good kernel for
further testing.

--Mark H Johnson
  <mailto:Mark_H_Johnson@raytheon.com>


^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-08-31 12:46 Mark_H_Johnson
  0 siblings, 0 replies; 122+ messages in thread
From: Mark_H_Johnson @ 2004-08-31 12:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, linux-kernel, Felipe Alfaro Solana, Daniel Schmitt,
	Lee Revell

>in theory the patch is more or less equivalent to setting
>netdev_max_backlog to a value of 1 - could you try that setting too?
>(with the patch unapplied.)

Ugh. That setting is VERY BAD. Just a quick test without
doing anything complex...
  # echo 1 > /proc/sys/net/core/netdev_max_backlog
  # ping dws7
  PING dws7 (192.52.215.17) 56(84) bytes of data.
[so the DNS lookup worked]
  From dws77... (192.52.215.87) icmp_seq=0 Destination Host Unreachable
  From dws77... (192.52.215.87) icmp_seq=1 Destination Host Unreachable
  From dws77... (192.52.215.87) icmp_seq=2 Destination Host Unreachable
  ...
[NOTE - these are plugged into the same 10/100 Ethernet switch]
  # echo 8 > /proc/sys/net/core/netdev_max_backlog
  # ping dws7
  PING dws7 (192.52.215.17) 56(84) bytes of data.
[so the DNS lookup worked]
  From dws77... (192.52.215.87) icmp_seq=0 ttl=64 time=2210 ms
  From dws77... (192.52.215.87) icmp_seq=1 ttl=64 time=1210 ms
  From dws77... (192.52.215.87) icmp_seq=2 ttl=64 time=210 ms
  From dws77... (192.52.215.87) icmp_seq=2 ttl=64 time=0.355 ms
  From dws77... (192.52.215.87) icmp_seq=2 ttl=64 time=0.397 ms
  ...
I tried again with 2, 3, and 4. Two appears to be "way too small" with
a ping of 1000 ms and nominal values of 0.800 ms. Three does not appear
to be good either with nominal values of 0.500 ms. Four has similar
results to eight (8).


--Mark H Johnson
  <mailto:Mark_H_Johnson@raytheon.com>


^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-08-31 15:17 Mark_H_Johnson
  2004-08-31 17:20 ` Lee Revell
  0 siblings, 1 reply; 122+ messages in thread
From: Mark_H_Johnson @ 2004-08-31 15:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Mark_H_Johnson, K.R. Foley, linux-kernel, Felipe Alfaro Solana,
	Daniel Schmitt, Lee Revell

>I will be running some additional tests
>- reducing preempt_max_latency
>- running with sortirq and hardirq_preemption=0
>to see if these uncover any further problems.

Same system / kernel combination as described previously.

With preempt_max_latency=500, I went from a handful of traces to 63 in
a 25 minute test run. Most traces during the last half of the test while
the disk was active (write, copy, read). I will send a copy of the full
traces separately (not to linux-kernel), but here is a summary of the
information gathered. If someone else wants the full traces, please send
an email separately.

Note - I did not repeat the network poll / number of read cycles
problem since we are already working that one.

Cascade
=======
Occurred five times, with latencies of
 latency: 964 us, entries: 285 (285)
 latency: 964 us, entries: 285 (285)
 latency: 1827 us, entries: 454 (454)
 latency: 1111 us, entries: 318 (318)
 latency: 969 us, entries: 279 (279)

Starts / ends at
 => started at: run_timer_softirq+0x12f/0x2a0
 => ended at:   run_timer_softirq+0x10a/0x2a0

For example:
00000001 0.000ms (+0.000ms): run_timer_softirq (___do_softirq)
00000001 0.000ms (+0.000ms): cascade (run_timer_softirq)
00000001 0.005ms (+0.004ms): cascade (run_timer_softirq)
00000001 0.009ms (+0.004ms): cascade (run_timer_softirq)
00000001 0.013ms (+0.004ms): cascade (run_timer_softirq)
00000001 0.018ms (+0.004ms): cascade (run_timer_softirq)
...
00000001 0.891ms (+0.004ms): cascade (run_timer_softirq)
00000001 0.895ms (+0.004ms): cascade (run_timer_softirq)
00000001 0.896ms (+0.000ms): internal_add_timer (cascade)
00010001 0.899ms (+0.003ms): do_IRQ (run_timer_softirq)
00010002 0.899ms (+0.000ms): mask_and_ack_level_ioapic_irq (do_IRQ)
00010002 0.899ms (+0.000ms): mask_IO_APIC_irq
(mask_and_ack_level_ioapic_irq)
00010003 0.900ms (+0.000ms): __mask_IO_APIC_irq (mask_IO_APIC_irq)
00010003 0.900ms (+0.000ms): __modify_IO_APIC_irq (__mask_IO_APIC_irq)
00010002 0.914ms (+0.013ms): generic_redirect_hardirq (do_IRQ)
...


Long Duration Trace Entries
===========================

Each of these traces had a delay of about 1/2 msec at one step.

#1 - audio driver
 latency: 621 us, entries: 28 (28)
    -----------------
    | task: latencytest/11492, uid:0 nice:0 policy:1 rt_prio:99
    -----------------
 => started at: snd_ensoniq_playback1_prepare+0x74/0x180
 => ended at:   snd_ensoniq_playback1_prepare+0x11d/0x180
=======>
00000001 0.000ms (+0.000ms): snd_ensoniq_playback1_prepare
(snd_pcm_do_prepare)
00000001 0.014ms (+0.014ms): snd_es1371_dac1_rate
(snd_ensoniq_playback1_prepare)
00000001 0.014ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_dac1_rate)
00000001 0.562ms (+0.548ms): snd_es1371_src_read (snd_es1371_dac1_rate)
00000001 0.562ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_read)
00000001 0.578ms (+0.015ms): snd_es1371_wait_src_ready
(snd_es1371_src_read)
00000001 0.585ms (+0.006ms): snd_es1371_src_write (snd_es1371_dac1_rate)
00000001 0.585ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_write)
00000001 0.601ms (+0.015ms): snd_es1371_src_write (snd_es1371_dac1_rate)
00000001 0.601ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_write)
00000001 0.602ms (+0.001ms): snd_es1371_wait_src_ready
(snd_es1371_dac1_rate)
00000001 0.616ms (+0.013ms): smp_apic_timer_interrupt
(snd_ensoniq_playback1_prepare)

or

 latency: 663 us, entries: 41 (41)
    -----------------
    | task: latencytest/11492, uid:0 nice:0 policy:1 rt_prio:99
    -----------------
 => started at: snd_ensoniq_playback1_prepare+0x74/0x180
 => ended at:   snd_ensoniq_playback1_prepare+0x11d/0x180
=======>
00000001 0.000ms (+0.000ms): snd_ensoniq_playback1_prepare
(snd_pcm_do_prepare)
00000001 0.004ms (+0.004ms): snd_es1371_dac1_rate
(snd_ensoniq_playback1_prepare)
00000001 0.005ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_dac1_rate)
00000001 0.006ms (+0.001ms): snd_es1371_src_read (snd_es1371_dac1_rate)
00000001 0.006ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_read)
00000001 0.019ms (+0.012ms): snd_es1371_wait_src_ready
(snd_es1371_src_read)
00000001 0.607ms (+0.588ms): snd_es1371_src_write (snd_es1371_dac1_rate)
00000001 0.608ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_write)
00000001 0.624ms (+0.016ms): snd_es1371_src_write (snd_es1371_dac1_rate)
00000001 0.624ms (+0.000ms): snd_es1371_wait_src_ready
(snd_es1371_src_write)
00000001 0.626ms (+0.001ms): snd_es1371_wait_src_ready
(snd_es1371_dac1_rate)
00000001 0.639ms (+0.013ms): smp_apic_timer_interrupt
(snd_ensoniq_playback1_prepare)

#2 - Scheduler

preemption latency trace v1.0.2
-------------------------------
 latency: 567 us, entries: 48 (48)
    -----------------
    | task: cpu_burn/9444, uid:0 nice:10 policy:0 rt_prio:0
    -----------------
 => started at: schedule+0x51/0x7b0
 => ended at:   schedule+0x35b/0x7b0
=======>
00000001 0.000ms (+0.000ms): schedule (io_schedule)
00000001 0.001ms (+0.001ms): sched_clock (schedule)
00000002 0.001ms (+0.000ms): deactivate_task (schedule)
00000002 0.002ms (+0.000ms): dequeue_task (deactivate_task)
00000002 0.549ms (+0.546ms): __switch_to (schedule)
00000002 0.550ms (+0.001ms): finish_task_switch (schedule)
00000002 0.550ms (+0.000ms): smp_apic_timer_interrupt (finish_task_switch)
00010002 0.551ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010002 0.551ms (+0.000ms): profile_hook (profile_tick)

[I have a LOT more traces where __switch_to has the big time delay]
but also note...

preemption latency trace v1.0.2
-------------------------------
 latency: 591 us, entries: 62 (62)
    -----------------
    | task: fam/4524, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: schedule+0x51/0x7b0
 => ended at:   schedule+0x35b/0x7b0
=======>
00000001 0.000ms (+0.000ms): schedule (io_schedule)
00000001 0.000ms (+0.000ms): sched_clock (schedule)
00000002 0.066ms (+0.066ms): deactivate_task (schedule)
00000002 0.066ms (+0.000ms): dequeue_task (deactivate_task)
00000002 0.475ms (+0.408ms): dequeue_task (schedule)
00000002 0.475ms (+0.000ms): recalc_task_prio (schedule)
00000002 0.475ms (+0.000ms): effective_prio (recalc_task_prio)
00000002 0.475ms (+0.000ms): enqueue_task (schedule)
00000002 0.557ms (+0.081ms): __switch_to (schedule)
00000002 0.558ms (+0.000ms): finish_task_switch (schedule)
00000002 0.558ms (+0.000ms): smp_apic_timer_interrupt (finish_task_switch)
00010002 0.559ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
where dequeue_task can take a while as well or this one

preemption latency trace v1.0.2
-------------------------------
 latency: 591 us, entries: 77 (77)
    -----------------
    | task: ksoftirqd/0/3, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: schedule+0x51/0x7b0
 => ended at:   schedule+0x35b/0x7b0
=======>
00000001 0.000ms (+0.000ms): schedule (io_schedule)
00000001 0.000ms (+0.000ms): sched_clock (schedule)
00000002 0.000ms (+0.000ms): deactivate_task (schedule)
00000002 0.000ms (+0.000ms): dequeue_task (deactivate_task)
00000002 0.480ms (+0.479ms): load_balance_newidle (schedule)
00000002 0.514ms (+0.034ms): find_busiest_group (load_balance_newidle)
00000002 0.554ms (+0.039ms): find_next_bit (find_busiest_group)
00000002 0.555ms (+0.001ms): find_next_bit (find_busiest_group)
00000002 0.555ms (+0.000ms): find_busiest_queue (load_balance_newidle)
00000002 0.556ms (+0.000ms): find_next_bit (find_busiest_queue)
00000002 0.557ms (+0.000ms): double_lock_balance (load_balance_newidle)
00000003 0.557ms (+0.000ms): move_tasks (load_balance_newidle)
00000003 0.559ms (+0.002ms): find_next_bit (move_tasks)
00000003 0.560ms (+0.000ms): find_next_bit (move_tasks)
00000003 0.561ms (+0.000ms): find_next_bit (move_tasks)
04000002 0.563ms (+0.002ms): __switch_to (schedule)
04000002 0.564ms (+0.000ms): finish_task_switch (schedule)

where it appears load balancing takes a long time.

#3 - kmap / kunmap

preemption latency trace v1.0.2
-------------------------------
 latency: 602 us, entries: 53 (53)
    -----------------
    | task: cp/11501, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: kmap_atomic+0x23/0xe0
 => ended at:   kunmap_atomic+0x7b/0xa0
=======>
00000001 0.000ms (+0.000ms): kmap_atomic (file_read_actor)
00000001 0.000ms (+0.000ms): page_address (file_read_actor)
00000001 0.000ms (+0.000ms): __copy_to_user_ll (file_read_actor)
00000001 0.502ms (+0.501ms): smp_apic_timer_interrupt (__copy_to_user_ll)
00010001 0.502ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 0.502ms (+0.000ms): profile_hook (profile_tick)
00010002 0.502ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.570ms (+0.068ms): profile_hit (smp_apic_timer_interrupt)
00010001 0.571ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 0.571ms (+0.000ms): update_one_process (update_process_times)
00010001 0.571ms (+0.000ms): run_local_timers (update_process_times)

or

preemption latency trace v1.0.2
-------------------------------
 latency: 615 us, entries: 75 (75)
    -----------------
    | task: cat/11844, uid:0 nice:0 policy:0 rt_prio:0
    -----------------
 => started at: kmap_atomic+0x23/0xe0
 => ended at:   kunmap_atomic+0x7b/0xa0
=======>
00000001 0.000ms (+0.000ms): kmap_atomic (file_read_actor)
00000001 0.000ms (+0.000ms): page_address (file_read_actor)
00000001 0.000ms (+0.000ms): __copy_to_user_ll (file_read_actor)
00000001 0.563ms (+0.562ms): smp_apic_timer_interrupt (__copy_to_user_ll)
00010001 0.563ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 0.563ms (+0.000ms): profile_hook (profile_tick)
00010002 0.564ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.564ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 0.564ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 0.564ms (+0.000ms): update_one_process (update_process_times)

#4 - mmap

preemption latency trace v1.0.2
-------------------------------
 latency: 660 us, entries: 48 (48)
    -----------------
    | task: get_ltrace.sh/12120, uid:0 nice:-20 policy:0 rt_prio:0
    -----------------
 => started at: cond_resched_lock+0x7b/0x140
 => ended at:   exit_mmap+0x168/0x210
=======>
00000001 0.000ms (+0.000ms): touch_preempt_timing (cond_resched_lock)
00000001 0.000ms (+0.000ms): __bitmap_weight (unmap_vmas)
00000001 0.000ms (+0.000ms): vm_acct_memory (exit_mmap)
00000001 0.001ms (+0.000ms): clear_page_tables (exit_mmap)
00010001 0.520ms (+0.518ms): do_IRQ (clear_page_tables)
00010002 0.564ms (+0.044ms): ack_edge_ioapic_irq (do_IRQ)
00010002 0.564ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010001 0.564ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010001 0.564ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010002 0.565ms (+0.001ms): mark_offset_tsc (timer_interrupt)
00010002 0.618ms (+0.052ms): do_timer (timer_interrupt)
00010002 0.618ms (+0.000ms): update_wall_time (do_timer)
00010002 0.618ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010002 0.619ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010002 0.619ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00000002 0.619ms (+0.000ms): do_softirq (do_IRQ)

#5 - network poll

preemption latency trace v1.0.2
-------------------------------
 latency: 753 us, entries: 371 (371)
    -----------------
    | task: ksoftirqd/1/5, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: rtl8139_poll+0x3c/0x160
 => ended at:   rtl8139_poll+0x100/0x160
=======>
00000001 0.000ms (+0.000ms): rtl8139_poll (net_rx_action)
00000001 0.000ms (+0.000ms): rtl8139_rx (rtl8139_poll)
00000001 0.002ms (+0.001ms): alloc_skb (rtl8139_rx)
00000001 0.002ms (+0.000ms): kmem_cache_alloc (alloc_skb)
00000001 0.002ms (+0.000ms): __kmalloc (alloc_skb)
00000001 0.004ms (+0.002ms): eth_type_trans (rtl8139_rx)
00000001 0.005ms (+0.000ms): netif_receive_skb (rtl8139_rx)
00000002 0.008ms (+0.002ms): packet_rcv_spkt (netif_receive_skb)
00000002 0.008ms (+0.000ms): skb_clone (packet_rcv_spkt)
00000002 0.009ms (+0.000ms): kmem_cache_alloc (skb_clone)
00000002 0.078ms (+0.069ms): memcpy (skb_clone)
00010002 0.498ms (+0.419ms): do_IRQ (skb_clone)
00010003 0.498ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010003 0.498ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010002 0.499ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010002 0.499ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010003 0.568ms (+0.068ms): mark_offset_tsc (timer_interrupt)
00010003 0.582ms (+0.014ms): do_timer (timer_interrupt)
00010003 0.582ms (+0.000ms): update_wall_time (do_timer)

or

preemption latency trace v1.0.2
-------------------------------
 latency: 752 us, entries: 395 (395)
    -----------------
    | task: ksoftirqd/1/5, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: rtl8139_poll+0x3c/0x160
 => ended at:   rtl8139_poll+0x100/0x160
=======>
00000001 0.000ms (+0.000ms): rtl8139_poll (net_rx_action)
00000001 0.000ms (+0.000ms): rtl8139_rx (rtl8139_poll)
00000001 0.002ms (+0.001ms): alloc_skb (rtl8139_rx)
00000001 0.002ms (+0.000ms): kmem_cache_alloc (alloc_skb)
00000001 0.002ms (+0.000ms): __kmalloc (alloc_skb)
00000001 0.005ms (+0.002ms): eth_type_trans (rtl8139_rx)
00000001 0.146ms (+0.140ms): netif_receive_skb (rtl8139_rx)
00000002 0.566ms (+0.420ms): packet_rcv_spkt (netif_receive_skb)
00000002 0.567ms (+0.000ms): skb_clone (packet_rcv_spkt)
00000002 0.567ms (+0.000ms): kmem_cache_alloc (skb_clone)
00000002 0.568ms (+0.000ms): memcpy (skb_clone)
00000002 0.570ms (+0.001ms): strlcpy (packet_rcv_spkt)

Separately I ran a series of tests with:
  preempt_max_latency=500
  hardirq_preemption=0
  softirq_preemption=0
which should be similar to the configuration I used in 2.4 kernels.
There were > 100 latency traces (my script stops at 100) in the same
25 minute test. In addition to the traces listed above, I had the
following problems.

RT Run Flush
============

preemption latency trace v1.0.2
-------------------------------
 latency: 1592 us, entries: 4000 (6306)
    -----------------
    | task: latencytest/6440, uid:0 nice:0 policy:1 rt_prio:99
    -----------------
 => started at: smp_apic_timer_interrupt+0x43/0x130
 => ended at:   smp_apic_timer_interrupt+0xaa/0x130

...
00000101 0.041ms (+0.001ms): add_entropy_words (extract_entropy)
00000101 0.042ms (+0.000ms): SHATransform (extract_entropy)
00000101 0.042ms (+0.000ms): memcpy (SHATransform)
00000101 0.044ms (+0.001ms): add_entropy_words (extract_entropy)
00000101 0.045ms (+0.000ms): add_entropy_words (extract_entropy)
00000101 0.046ms (+0.001ms): credit_entropy_store (extract_entropy)
00000102 0.047ms (+0.001ms): __wake_up (extract_entropy)
00000103 0.047ms (+0.000ms): __wake_up_common (__wake_up)
00000101 0.048ms (+0.000ms): SHATransform (extract_entropy)
00000101 0.048ms (+0.000ms): memcpy (SHATransform)
00000101 0.050ms (+0.001ms): add_entropy_words (extract_entropy)
00000101 0.050ms (+0.000ms): SHATransform (extract_entropy)
00000101 0.050ms (+0.000ms): memcpy (SHATransform)
00000101 0.052ms (+0.001ms): add_entropy_words (extract_entropy)
00000201 0.053ms (+0.001ms): local_bh_enable (rt_run_flush)
00000101 0.053ms (+0.000ms): cond_resched_all (rt_run_flush)
00000101 0.053ms (+0.000ms): cond_resched_softirq (rt_run_flush)
00000201 0.054ms (+0.000ms): local_bh_enable (rt_run_flush)
00000101 0.054ms (+0.000ms): cond_resched_all (rt_run_flush)
00000101 0.054ms (+0.000ms): cond_resched_softirq (rt_run_flush)
... the last 3 lines repeat over 1000 times and fill
the trace buffer completely ...

The above sequence occurred twice during testing.

Short But Long
==============

preemption latency trace v1.0.2
-------------------------------
 latency: 549 us, entries: 4 (4)
    -----------------
    | task: kblockd/1/11, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: worker_thread+0x22d/0x3a0
 => ended at:   worker_thread+0x2a2/0x3a0
=======>
00000001 0.000ms (+0.000ms): worker_thread (kthread)
00000001 0.000ms (+0.000ms): __wake_up (worker_thread)
00000002 0.549ms (+0.549ms): __wake_up_common (__wake_up)
00000001 0.550ms (+0.000ms): sub_preempt_count (worker_thread)

or

preemption latency trace v1.0.2
-------------------------------
 latency: 551 us, entries: 4 (4)
    -----------------
    | task: kblockd/1/11, uid:0 nice:-10 policy:0 rt_prio:0
    -----------------
 => started at: worker_thread+0x22d/0x3a0
 => ended at:   worker_thread+0x2a2/0x3a0
=======>
00000001 0.000ms (+0.000ms): worker_thread (kthread)
00000001 0.000ms (+0.000ms): __wake_up (worker_thread)
00000002 0.000ms (+0.000ms): __wake_up_common (__wake_up)
00000001 0.552ms (+0.551ms): sub_preempt_count (worker_thread)

or this one apparently preempting the real time task [why??]
Now that I look, the first set of tests preempted the max priority
real time application six (6) times and the second set of tests
preempted the RT application thirty (30) times.

preemption latency trace v1.0.2
-------------------------------
 latency: 566 us, entries: 13 (13)
    -----------------
    | task: latencytest/7959, uid:0 nice:0 policy:1 rt_prio:99
    -----------------
 => started at: do_IRQ+0x19/0x290
 => ended at:   do_IRQ+0x1cf/0x290
=======>
00010000 0.000ms (+0.000ms): do_IRQ (common_interrupt)
00010000 0.000ms (+0.000ms): do_IRQ (<08049b20>)
00010001 0.000ms (+0.000ms): ack_edge_ioapic_irq (do_IRQ)
00010001 0.000ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010000 0.000ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010000 0.001ms (+0.000ms): timer_interrupt (generic_handle_IRQ_event)
00010001 0.549ms (+0.548ms): mark_offset_tsc (timer_interrupt)
00010001 0.564ms (+0.014ms): do_timer (timer_interrupt)
00010001 0.564ms (+0.000ms): update_wall_time (do_timer)
00010001 0.564ms (+0.000ms): update_wall_time_one_tick (update_wall_time)
00010001 0.565ms (+0.001ms): generic_note_interrupt (do_IRQ)
00010001 0.566ms (+0.000ms): end_edge_ioapic_irq (do_IRQ)
00000001 0.566ms (+0.000ms): sub_preempt_count (do_IRQ)

Another Long Time
=================

#1 - kmap / kunmap

Similar stop / end locations to above, but a different cause.

preemption latency trace v1.0.2
-------------------------------
 latency: 696 us, entries: 131 (131)
    -----------------
    | task: sleep/8854, uid:0 nice:-20 policy:0 rt_prio:0
    -----------------
 => started at: kmap_atomic+0x23/0xe0
 => ended at:   kunmap_atomic+0x7b/0xa0
=======>
00000001 0.000ms (+0.000ms): kmap_atomic (do_anonymous_page)
00000001 0.000ms (+0.000ms): page_address (do_anonymous_page)
00010001 0.413ms (+0.413ms): do_IRQ (do_anonymous_page)
00010002 0.452ms (+0.038ms): ack_edge_ioapic_irq (do_IRQ)
00010002 0.452ms (+0.000ms): generic_redirect_hardirq (do_IRQ)
00010001 0.454ms (+0.001ms): generic_handle_IRQ_event (do_IRQ)
00010001 0.488ms (+0.034ms): timer_interrupt (generic_handle_IRQ_event)
00010002 0.557ms (+0.069ms): mark_offset_tsc (timer_interrupt)
00010002 0.573ms (+0.015ms): do_timer (timer_interrupt)
00010002 0.574ms (+0.000ms): update_wall_time (do_timer)
00010002 0.574ms (+0.000ms): update_wall_time_one_tick (update_wall_time)

Perhaps the most disturbing finding tat the max priority RT application
can get preempted for a long time, even though there is:
 - only one real time task
 - two CPU's to do work

  --Mark


^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-08-31 20:10 Mark_H_Johnson
  2004-08-31 20:37 ` Ingo Molnar
  0 siblings, 1 reply; 122+ messages in thread
From: Mark_H_Johnson @ 2004-08-31 20:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, linux-kernel, Felipe Alfaro Solana, Daniel Schmitt,
	Lee Revell

>since the latency tracer does not trigger, we need a modified tracer to
>find out what's happening during such long delays. I've attached the
>'user-latency-tracer' patch ontop of -Q5, which is a modification of the
>latency tracer.
Grr. I should have checked before I built with this patch. With this in
I now get the
  kernel: Could not allocate 4 bytes percpu data
messages again. Need to increase that data area so
  #define PERCPU_ENOUGH_ROOM 196608
or something similar (should leave about 50K free for modules).

I will rebuild with this change plus the latest of the others.

--Mark H Johnson
  <mailto:Mark_H_Johnson@raytheon.com>


^ permalink raw reply	[flat|nested] 122+ messages in thread
[parent not found: <2yiVZ-IZ-15@gated-at.bofh.it>]
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-09-01 14:37 Mark_H_Johnson
  2004-09-01 19:31 ` Takashi Iwai
  0 siblings, 1 reply; 122+ messages in thread
From: Mark_H_Johnson @ 2004-09-01 14:37 UTC (permalink / raw)
  To: Takashi Iwai
  Cc: Ingo Molnar, Lee Revell, Mark_H_Johnson, K.R. Foley, linux-kernel,
	Felipe Alfaro Solana, Daniel Schmitt, alsa-devel

>Ok, the second try.

This patch appears to work well. No snd_es1371 traces in over 25 minutes
of testing (I had a couple hundred yesterday in similar tests). The sound
was OK as well.

I am seeing some additional CPU overhead during the disk I/O tests with
today's kernel but I don't think this is due to this patch.

Thanks.
  --Mark


^ permalink raw reply	[flat|nested] 122+ messages in thread
* Re: [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5
@ 2004-09-01 15:21 Mark_H_Johnson
  2004-09-02 22:24 ` Ingo Molnar
  0 siblings, 1 reply; 122+ messages in thread
From: Mark_H_Johnson @ 2004-09-01 15:21 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: K.R. Foley, linux-kernel, Felipe Alfaro Solana, Daniel Schmitt,
	Lee Revell

>... Need to increase that data area so
>#define PERCPU_ENOUGH_ROOM 196608
>or something similar (should leave about 50K free for modules).
>
>I will rebuild with this change plus the latest of the others.

This booted fine today.

I reported the results of the audio patch separately. That seems to have
removed the audio latencies I saw previously.

A Whopper!
==========

A latency trace > 20 msec was seen (once). It starts pretty bad and then
gets stuck in a 1 msec loop.

preemption latency trace v1.0.2
-------------------------------
 latency: 23120 us, entries: 406 (406)
    -----------------
    | task: latencytest/4999, uid:0 nice:0 policy:1 rt_prio:99
    -----------------
 => started at: del_timer+0x4c/0x150
 => ended at:   del_timer+0xf2/0x150
=======>
00000001 0.000ms (+0.000ms): del_timer (del_singleshot_timer_sync)
00000001 0.700ms (+0.700ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 0.700ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 0.701ms (+0.000ms): profile_hook (profile_tick)
00010002 0.701ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 0.702ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 0.702ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 0.702ms (+0.000ms): update_one_process (update_process_times)
00010001 0.703ms (+0.000ms): run_local_timers (update_process_times)
00010001 0.703ms (+0.000ms): raise_softirq (update_process_times)
00010001 0.703ms (+0.000ms): scheduler_tick (update_process_times)
00010001 0.703ms (+0.000ms): sched_clock (scheduler_tick)
00010001 0.704ms (+0.001ms): rebalance_tick (scheduler_tick)
00000002 0.704ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 0.705ms (+0.000ms): __do_softirq (do_softirq)
00000002 0.705ms (+0.000ms): wake_up_process (do_softirq)
00000002 0.705ms (+0.000ms): try_to_wake_up (wake_up_process)
00000002 0.705ms (+0.000ms): task_rq_lock (try_to_wake_up)
00000003 0.706ms (+0.000ms): activate_task (try_to_wake_up)
00000003 0.706ms (+0.000ms): sched_clock (activate_task)
00000003 0.706ms (+0.000ms): recalc_task_prio (activate_task)
00000003 0.706ms (+0.000ms): effective_prio (recalc_task_prio)
00000003 0.706ms (+0.000ms): enqueue_task (activate_task)
00010001 1.301ms (+0.594ms): do_IRQ (.text.lock.timer)
00010002 1.302ms (+0.000ms): mask_and_ack_level_ioapic_irq (do_IRQ)
00010002 1.302ms (+0.000ms): mask_IO_APIC_irq
(mask_and_ack_level_ioapic_irq)
00010003 1.302ms (+0.000ms): __mask_IO_APIC_irq (mask_IO_APIC_irq)
00010003 1.302ms (+0.000ms): __modify_IO_APIC_irq (__mask_IO_APIC_irq)
00010002 1.315ms (+0.013ms): generic_redirect_hardirq (do_IRQ)
00010001 1.315ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010001 1.316ms (+0.000ms): usb_hcd_irq (generic_handle_IRQ_event)
00010001 1.316ms (+0.000ms): uhci_irq (usb_hcd_irq)
00010001 1.316ms (+0.000ms): usb_hcd_irq (generic_handle_IRQ_event)
00010001 1.317ms (+0.000ms): uhci_irq (usb_hcd_irq)
00010001 1.317ms (+0.000ms): snd_audiopci_interrupt
(generic_handle_IRQ_event)
00010001 1.319ms (+0.001ms): snd_pcm_period_elapsed
(snd_audiopci_interrupt)
00010003 1.319ms (+0.000ms): snd_ensoniq_playback1_pointer
(snd_pcm_period_elapsed)
00010003 1.321ms (+0.001ms): snd_pcm_playback_silence
(snd_pcm_period_elapsed)
00010003 1.321ms (+0.000ms): __wake_up (snd_pcm_period_elapsed)
00010004 1.321ms (+0.000ms): __wake_up_common (__wake_up)
00010004 1.322ms (+0.000ms): default_wake_function (__wake_up_common)
00010004 1.322ms (+0.000ms): try_to_wake_up (__wake_up_common)
00010004 1.322ms (+0.000ms): task_rq_lock (try_to_wake_up)
00010001 1.323ms (+0.000ms): kill_fasync (snd_pcm_period_elapsed)
00010002 1.323ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010002 1.323ms (+0.000ms): end_level_ioapic_irq (do_IRQ)
00010002 1.323ms (+0.000ms): unmask_IO_APIC_irq (do_IRQ)
00010003 1.324ms (+0.000ms): __unmask_IO_APIC_irq (unmask_IO_APIC_irq)
00010003 1.324ms (+0.000ms): __modify_IO_APIC_irq (__unmask_IO_APIC_irq)
00000002 1.338ms (+0.013ms): do_softirq (do_IRQ)
00000002 1.338ms (+0.000ms): __do_softirq (do_softirq)
00000001 1.699ms (+0.361ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 1.700ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 1.700ms (+0.000ms): profile_hook (profile_tick)
00010002 1.700ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 1.700ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 1.700ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 1.701ms (+0.000ms): update_one_process (update_process_times)
00010001 1.701ms (+0.000ms): run_local_timers (update_process_times)
00010001 1.701ms (+0.000ms): raise_softirq (update_process_times)
00010001 1.701ms (+0.000ms): scheduler_tick (update_process_times)
00010001 1.701ms (+0.000ms): sched_clock (scheduler_tick)
00010001 1.702ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 1.702ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 1.702ms (+0.000ms): __do_softirq (do_softirq)
00000001 2.699ms (+0.996ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 2.699ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 2.699ms (+0.000ms): profile_hook (profile_tick)
00010002 2.699ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 2.699ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 2.700ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 2.700ms (+0.000ms): update_one_process (update_process_times)
00010001 2.700ms (+0.000ms): run_local_timers (update_process_times)
00010001 2.700ms (+0.000ms): raise_softirq (update_process_times)
00010001 2.700ms (+0.000ms): scheduler_tick (update_process_times)
00010001 2.701ms (+0.000ms): sched_clock (scheduler_tick)
00010001 2.701ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 2.701ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 2.701ms (+0.000ms): __do_softirq (do_softirq)
00010001 2.759ms (+0.057ms): do_IRQ (.text.lock.timer)
00010002 2.759ms (+0.000ms): mask_and_ack_level_ioapic_irq (do_IRQ)
00010002 2.759ms (+0.000ms): mask_IO_APIC_irq
(mask_and_ack_level_ioapic_irq)
00010003 2.760ms (+0.000ms): __mask_IO_APIC_irq (mask_IO_APIC_irq)
00010003 2.760ms (+0.000ms): __modify_IO_APIC_irq (__mask_IO_APIC_irq)
00010002 2.773ms (+0.013ms): generic_redirect_hardirq (do_IRQ)
00010001 2.773ms (+0.000ms): generic_handle_IRQ_event (do_IRQ)
00010001 2.773ms (+0.000ms): usb_hcd_irq (generic_handle_IRQ_event)
00010001 2.774ms (+0.000ms): uhci_irq (usb_hcd_irq)
00010001 2.774ms (+0.000ms): usb_hcd_irq (generic_handle_IRQ_event)
00010001 2.774ms (+0.000ms): uhci_irq (usb_hcd_irq)
00010001 2.775ms (+0.000ms): snd_audiopci_interrupt
(generic_handle_IRQ_event)
00010001 2.776ms (+0.001ms): snd_pcm_period_elapsed
(snd_audiopci_interrupt)
00010003 2.777ms (+0.000ms): snd_ensoniq_playback1_pointer
(snd_pcm_period_elapsed)
00010003 2.778ms (+0.001ms): snd_pcm_playback_silence
(snd_pcm_period_elapsed)
00010003 2.779ms (+0.001ms): snd_pcm_format_set_silence
(snd_pcm_playback_silence)
00010003 2.780ms (+0.000ms): snd_pcm_format_set_silence
(snd_pcm_playback_silence)
00010003 2.781ms (+0.000ms): xrun (snd_pcm_period_elapsed)
00010003 2.781ms (+0.000ms): snd_pcm_stop (xrun)
00010003 2.782ms (+0.000ms): snd_pcm_action (snd_pcm_stop)
00010003 2.783ms (+0.000ms): snd_pcm_action_single (snd_pcm_action)
00010003 2.783ms (+0.000ms): snd_pcm_pre_stop (snd_pcm_action_single)
00010003 2.783ms (+0.000ms): snd_pcm_do_stop (snd_pcm_action_single)
00010003 2.784ms (+0.000ms): snd_ensoniq_trigger (snd_pcm_do_stop)
00010003 2.786ms (+0.001ms): snd_pcm_post_stop (snd_pcm_action_single)
00010003 2.786ms (+0.000ms): snd_pcm_trigger_tstamp (snd_pcm_post_stop)
00010003 2.787ms (+0.000ms): do_gettimeofday (snd_pcm_trigger_tstamp)
00010003 2.787ms (+0.000ms): get_offset_tsc (do_gettimeofday)
00010003 2.788ms (+0.000ms): snd_timer_notify (snd_pcm_post_stop)
00010003 2.789ms (+0.001ms): snd_pcm_tick_set (snd_pcm_post_stop)
00010003 2.790ms (+0.000ms): snd_pcm_system_tick_set (snd_pcm_post_stop)
00010003 2.790ms (+0.000ms): del_timer (snd_pcm_post_stop)
00010003 2.791ms (+0.000ms): __wake_up (snd_pcm_action_single)
00010004 2.791ms (+0.000ms): __wake_up_common (__wake_up)
00010004 2.791ms (+0.000ms): default_wake_function (__wake_up_common)
00010004 2.791ms (+0.000ms): try_to_wake_up (__wake_up_common)
00010004 2.791ms (+0.000ms): task_rq_lock (try_to_wake_up)
00010001 2.792ms (+0.000ms): kill_fasync (snd_pcm_period_elapsed)
00010002 2.793ms (+0.000ms): generic_note_interrupt (do_IRQ)
00010002 2.793ms (+0.000ms): end_level_ioapic_irq (do_IRQ)
00010002 2.793ms (+0.000ms): unmask_IO_APIC_irq (do_IRQ)
00010003 2.793ms (+0.000ms): __unmask_IO_APIC_irq (unmask_IO_APIC_irq)
00010003 2.793ms (+0.000ms): __modify_IO_APIC_irq (__unmask_IO_APIC_irq)
00000002 2.807ms (+0.013ms): do_softirq (do_IRQ)
00000002 2.807ms (+0.000ms): __do_softirq (do_softirq)
00000001 3.698ms (+0.890ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 3.698ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 3.698ms (+0.000ms): profile_hook (profile_tick)
00010002 3.699ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 3.699ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 3.699ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 3.699ms (+0.000ms): update_one_process (update_process_times)
00010001 3.700ms (+0.000ms): run_local_timers (update_process_times)
00010001 3.700ms (+0.000ms): raise_softirq (update_process_times)
00010001 3.700ms (+0.000ms): scheduler_tick (update_process_times)
00010001 3.700ms (+0.000ms): sched_clock (scheduler_tick)
00010001 3.701ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 3.701ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 3.701ms (+0.000ms): __do_softirq (do_softirq)
00000001 4.697ms (+0.996ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 4.698ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 4.698ms (+0.000ms): profile_hook (profile_tick)
00010002 4.698ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 4.698ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 4.698ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 4.699ms (+0.000ms): update_one_process (update_process_times)
00010001 4.699ms (+0.000ms): run_local_timers (update_process_times)
00010001 4.699ms (+0.000ms): raise_softirq (update_process_times)
00010001 4.699ms (+0.000ms): scheduler_tick (update_process_times)
00010001 4.699ms (+0.000ms): sched_clock (scheduler_tick)
00010001 4.700ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 4.700ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 4.700ms (+0.000ms): __do_softirq (do_softirq)
00000001 5.697ms (+0.996ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 5.697ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 5.697ms (+0.000ms): profile_hook (profile_tick)
00010002 5.697ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 5.698ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 5.698ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 5.698ms (+0.000ms): update_one_process (update_process_times)
00010001 5.698ms (+0.000ms): run_local_timers (update_process_times)
00010001 5.698ms (+0.000ms): raise_softirq (update_process_times)
00010001 5.698ms (+0.000ms): scheduler_tick (update_process_times)
(this cycle repeats several times, exited as follows)
00000001 21.686ms (+0.996ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 21.686ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 21.686ms (+0.000ms): profile_hook (profile_tick)
00010002 21.686ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 21.687ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 21.687ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 21.687ms (+0.000ms): update_one_process (update_process_times)
00010001 21.687ms (+0.000ms): run_local_timers (update_process_times)
00010001 21.687ms (+0.000ms): raise_softirq (update_process_times)
00010001 21.687ms (+0.000ms): scheduler_tick (update_process_times)
00010001 21.688ms (+0.000ms): sched_clock (scheduler_tick)
00010001 21.688ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 21.688ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 21.688ms (+0.000ms): __do_softirq (do_softirq)
00000001 22.685ms (+0.996ms): smp_apic_timer_interrupt (.text.lock.timer)
00010001 22.685ms (+0.000ms): profile_tick (smp_apic_timer_interrupt)
00010001 22.685ms (+0.000ms): profile_hook (profile_tick)
00010002 22.686ms (+0.000ms): notifier_call_chain (profile_hook)
00010001 22.686ms (+0.000ms): profile_hit (smp_apic_timer_interrupt)
00010001 22.686ms (+0.000ms): update_process_times
(smp_apic_timer_interrupt)
00010001 22.686ms (+0.000ms): update_one_process (update_process_times)
00010001 22.686ms (+0.000ms): run_local_timers (update_process_times)
00010001 22.686ms (+0.000ms): raise_softirq (update_process_times)
00010001 22.687ms (+0.000ms): scheduler_tick (update_process_times)
00010001 22.687ms (+0.000ms): sched_clock (scheduler_tick)
00010001 22.687ms (+0.000ms): rebalance_tick (scheduler_tick)
00000002 22.687ms (+0.000ms): do_softirq (smp_apic_timer_interrupt)
00000002 22.688ms (+0.000ms): __do_softirq (do_softirq)
00000001 23.120ms (+0.432ms): sub_preempt_count (del_timer)
00000001 23.121ms (+0.000ms): update_max_trace (check_preempt_timing)

TSC mcount
==========

>From your patch, added several mcount() calls to mark_offset_tsc.
To summarize the trace results, here is a table that reports the
delta times for each location. Each row represents one of the dozen
trace outputs per latency trace. Row columns are the file names
(lt.xx) in the tar file. Times are in usec.

     01  03  04  13  16  26  27  31  32  35  37  39
01  000 000 000 069 000 000 000 000 000 081 136 000
02  032 000 000 000 000 000 000 000 000 000 000 000
03  000 000 000 000 000 000 000 000 000 000 000 000
04  001 000 000 070 231 139 138 093 252 062 000 067
05  000 000 000 000 000 000 000 000 000 000 000 000
06  042 003 003 004 003 004 004 053 145 076 003 004
07  004 004 004 004 008 004 005 006 010 011 004 005
08  001 001 002 002 008 002 002 002 001 002 001 002
09  000 000 000 000 000 000 000 000 000 000 000 000
10  000 000 000 000 000 000 000 000 000 000 000 000
11  000 000 000 000 000 000 000 000 000 000 000 000
12  000 000 000 061 000 130 129 129 000 000 000 060

NOTE: This is not all the results w/ mark_offset_tsc listed. After the
first few items, I only listed those with significant (>100 usec) delays.

Network Poll
============

I still have a few traces w/ long durations during network poll. Since
you merged that other change into Q6 (and I assume Q7) I will try that
later today.

I will also try to make a modified version of latencytest that does
the extended trace of the write system call (occasionally) to see what
is going on there as well.

For reference, I will send the tar file with all the traces in another
message (not to linux-kernel).

 --Mark


^ permalink raw reply	[flat|nested] 122+ messages in thread
[parent not found: <OFD220F58F.002C5901-ON86256F02.005C2FB1-86256F02.005C2FD5@raytheon.com>]

end of thread, other threads:[~2004-09-02 22:24 UTC | newest]

Thread overview: 122+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-23 22:18 [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
2004-08-24  6:14 ` [patch] voluntary-preempt-2.6.8.1-P9 Ingo Molnar
2004-08-24 17:43   ` K.R. Foley
2004-08-24 20:32     ` Lee Revell
2004-08-24 20:53       ` Scott Wood
2004-08-24 19:20   ` K.R. Foley
2004-08-24 22:47   ` Lee Revell
2004-08-25  2:00   ` Lee Revell
2004-08-25  3:17   ` K.R. Foley
2004-08-25  3:22     ` Lee Revell
2004-08-25 14:34       ` K.R. Foley
2004-08-25 16:00       ` K.R. Foley
2004-08-25  3:26   ` K.R. Foley
2004-08-25  9:58   ` [patch] voluntary-preempt-2.6.8.1-P9 : oprofile latency at 3.3ms P.O. Gaillard
2004-08-26 21:39   ` [patch] voluntary-preempt-2.6.8.1-P9 Lee Revell
2004-08-27 16:54     ` Lee Revell
2004-08-28  7:37       ` Ingo Molnar
2004-08-28 15:10         ` Lee Revell
2004-08-28 12:14     ` Ingo Molnar
2004-08-30  9:27       ` voluntary-preempt-2.6.8.1-P9 : big latency when logging on console P.O. Gaillard
2004-08-30  9:41         ` Ingo Molnar
2004-08-30 12:25           ` P.O. Gaillard
2004-08-30  9:48       ` [patch] voluntary-preempt-2.6.8.1-P9 : a few submillisecond latencies P.O. Gaillard
2004-08-28 12:03   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q0 Ingo Molnar
2004-08-28 16:18     ` Felipe Alfaro Solana
2004-08-28 16:50       ` K.R. Foley
2004-08-28 17:52         ` Lee Revell
2004-08-28 19:44           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q2 Ingo Molnar
2004-08-28 20:01             ` Lee Revell
2004-08-28 20:04               ` Ingo Molnar
2004-08-28 20:08                 ` Lee Revell
2004-08-28 20:10             ` Daniel Schmitt
2004-08-28 20:31               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
2004-08-28 21:10                 ` Lee Revell
2004-08-28 21:13                   ` Ingo Molnar
2004-08-28 21:16                     ` Lee Revell
2004-08-28 23:51                       ` Lee Revell
2004-08-29  2:35                         ` Lee Revell
2004-08-29  5:43                           ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q4 Ingo Molnar
2004-08-29  6:57                             ` Lee Revell
2004-08-29 18:01                               ` Ingo Molnar
2004-08-29 19:06                               ` Ingo Molnar
2004-08-30  0:47                                 ` K.R. Foley
2004-08-30  3:42                                   ` K.R. Foley
2004-08-30 13:06                                   ` Alan Cox
2004-08-30 17:37                                     ` Ingo Molnar
2004-08-31 16:39                                     ` K.R. Foley
2004-08-30  9:06                             ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Ingo Molnar
2004-08-30 14:25                               ` Thomas Charbonnel
2004-08-30 18:00                                 ` Ingo Molnar
2004-08-31 19:23                                   ` Thomas Charbonnel
2004-08-31 19:30                                     ` Ingo Molnar
2004-08-31 19:45                                       ` Thomas Charbonnel
2004-08-31  6:40                               ` Lee Revell
2004-08-31  6:53                                 ` Ingo Molnar
2004-08-31 23:03                                   ` Lee Revell
2004-09-01 15:52                                     ` Martin Josefsson
2004-09-01 21:15                                       ` Lee Revell
2004-09-01 21:30                                       ` Lee Revell
2004-08-31  7:06                                 ` Ingo Molnar
2004-08-31 19:21                                   ` Lee Revell
2004-08-31 19:37                                     ` Ingo Molnar
2004-08-31 19:47                                       ` Lee Revell
2004-08-31 19:51                                         ` Ingo Molnar
2004-08-31 20:09                                           ` Ingo Molnar
2004-08-31 20:10                                             ` Lee Revell
2004-08-31 20:14                                               ` Ingo Molnar
2004-08-31 20:20                                                 ` Ingo Molnar
2004-08-31 20:34                                                   ` Lee Revell
2004-08-31 20:39                                                     ` Ingo Molnar
2004-08-31 20:41                                                       ` Lee Revell
2004-08-31 17:40                               ` Peter Zijlstra
2004-09-01  1:43                               ` Lee Revell
2004-09-01  2:30                               ` Lee Revell
2004-09-01  7:27                               ` Lee Revell
2004-09-01  8:29                               ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q6 Ingo Molnar
2004-09-01 13:51                                 ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q7 Ingo Molnar
2004-09-01 17:09                                   ` Thomas Charbonnel
2004-09-01 19:03                                     ` K.R. Foley
2004-09-01 20:11                                     ` Peter Zijlstra
2004-09-01 20:16                                       ` Lee Revell
2004-09-01 20:53                                       ` K.R. Foley
     [not found]                                   ` <41367E5D.3040605@cybsft.com>
2004-09-02  5:37                                     ` Ingo Molnar
2004-09-02  5:40                                       ` Ingo Molnar
2004-08-30 12:52                   ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q3 Ingo Molnar
2004-08-29  7:40                 ` Matt Heler
2004-08-24 19:51 ` [patch] PPC/PPC64 port of voluntary preempt patch Scott Wood
2004-08-26  3:17   ` Lee Revell
2004-08-26 16:38     ` Scott Wood
2004-08-27  1:18     ` Fernando Pablo Lopez-Lezcano
2004-08-28 12:36   ` Ingo Molnar
2004-08-28 13:01     ` [patch] voluntary-preempt-2.6.9-rc1-bk4-Q1 Ingo Molnar
2004-08-30  1:06       ` Fernando Pablo Lopez-Lezcano
  -- strict thread matches above, loose matches on Subject: below --
2004-08-30 19:13 [patch] voluntary-preempt-2.6.9-rc1-bk4-Q5 Mark_H_Johnson
2004-08-30 19:21 ` Ingo Molnar
2004-08-31  8:49 ` Ingo Molnar
2004-09-02  6:33 ` Ingo Molnar
2004-08-30 22:04 Mark_H_Johnson
2004-08-31  6:31 ` Ingo Molnar
2004-09-01  7:30 ` Ingo Molnar
2004-08-31 12:46 Mark_H_Johnson
2004-08-31 15:17 Mark_H_Johnson
2004-08-31 17:20 ` Lee Revell
2004-08-31 18:09   ` Lee Revell
2004-08-31 18:53     ` Takashi Iwai
2004-08-31 18:56       ` Ingo Molnar
2004-09-02 16:59         ` Jaroslav Kysela
2004-09-02 17:50           ` Lee Revell
2004-08-31 18:19   ` Takashi Iwai
2004-08-31 18:48     ` Ingo Molnar
2004-08-31 19:02       ` Takashi Iwai
2004-08-31 18:50   ` Ingo Molnar
2004-08-31 20:10 Mark_H_Johnson
2004-08-31 20:37 ` Ingo Molnar
     [not found] <2yiVZ-IZ-15@gated-at.bofh.it>
     [not found] ` <2ylhi-2hg-3@gated-at.bofh.it>
     [not found]   ` <2ynLU-42D-7@gated-at.bofh.it>
     [not found]     ` <2yqJJ-5ZL-1@gated-at.bofh.it>
     [not found]       ` <2yQkS-6Zh-13@gated-at.bofh.it>
     [not found]         ` <2zaCV-4FE-3@gated-at.bofh.it>
     [not found]           ` <2zaWk-4Yj-9@gated-at.bofh.it>
     [not found]             ` <2zmE8-4Zz-11@gated-at.bofh.it>
     [not found]               ` <2zngP-5wD-9@gated-at.bofh.it>
     [not found]                 ` <2zngP-5wD-7@gated-at.bofh.it>
     [not found]                   ` <2znJS-5Pm-25@gated-at.bofh.it>
2004-08-31 23:06                     ` Andi Kleen
     [not found]                     ` <2znJS-5Pm-27@gated-at.bofh.it>
     [not found]                       ` <2znJS-5Pm-29@gated-at.bofh.it>
     [not found]                         ` <2znJS-5Pm-31@gated-at.bofh.it>
     [not found]                           ` <2znJS-5Pm-33@gated-at.bofh.it>
2004-08-31 23:10                             ` Andi Kleen
2004-09-01  7:05                               ` Ingo Molnar
2004-09-01 14:37 Mark_H_Johnson
2004-09-01 19:31 ` Takashi Iwai
2004-09-01 15:21 Mark_H_Johnson
2004-09-02 22:24 ` Ingo Molnar
     [not found] <OFD220F58F.002C5901-ON86256F02.005C2FB1-86256F02.005C2FD5@raytheon.com>
2004-09-01 17:09 ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox