All of lore.kernel.org
 help / color / mirror / Atom feed
* [Xenomai-core] [PREVIEW] ipipe-2.6.23-i386-1.10-07
@ 2007-10-13  9:10 Jan Kiszka
  0 siblings, 0 replies; only message in thread
From: Jan Kiszka @ 2007-10-13  9:10 UTC (permalink / raw)
  To: adeos-main; +Cc: Xenomai-core


[-- Attachment #1.1: Type: text/plain, Size: 528 bytes --]

This is a forward port of the latest ipipe patch for 2.6.22 to 2.6.23,
additionally based on Philippe's earlier work for -rc2. It runs
surprisingly well here, so I would like to invite more testers to the
party. You need Xenomai SVN head + my timer setup fix [1] to create a
test platform.

I had to perform one small magic dance to get the patch compiling due to
inclusion hell around ipipe_base.h, check
arch/i386/boot/compressed/Makefile.

Jan

[1] https://mail.gna.org/public/xenomai-core/2007-10/msg00068.html

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: adeos-ipipe-2.6.23-i386-1.10-07.patch --]
[-- Type: text/x-patch; name="adeos-ipipe-2.6.23-i386-1.10-07.patch", Size: 252687 bytes --]

Index: linux-2.6.23/Makefile
===================================================================
--- linux-2.6.23.orig/Makefile
+++ linux-2.6.23/Makefile
@@ -491,6 +491,10 @@ endif
 
 include $(srctree)/arch/$(ARCH)/Makefile
 
+ifdef CONFIG_IPIPE_TRACE_MCOUNT
+CFLAGS          += -pg
+endif
+
 ifdef CONFIG_FRAME_POINTER
 CFLAGS		+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
Index: linux-2.6.23/arch/i386/Kconfig
===================================================================
--- linux-2.6.23.orig/arch/i386/Kconfig
+++ linux-2.6.23/arch/i386/Kconfig
@@ -217,7 +217,7 @@ endchoice
 config PARAVIRT
 	bool "Paravirtualization support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !(X86_VISWS || X86_VOYAGER || IPIPE)
 	help
 	  Paravirtualization is a way of running multiple instances of
 	  Linux on the same machine, under a hypervisor.  This option
@@ -315,6 +315,8 @@ config SCHED_MC
 
 source "kernel/Kconfig.preempt"
 
+source "kernel/ipipe/Kconfig"
+
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
 	depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
Index: linux-2.6.23/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/Makefile
+++ linux-2.6.23/arch/i386/kernel/Makefile
@@ -32,6 +32,8 @@ obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-y				+= sysenter.o vsyscall.o
+obj-$(CONFIG_IPIPE)		+= ipipe.o
+obj-$(CONFIG_IPIPE_TRACE_MCOUNT)	+= mcount.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
Index: linux-2.6.23/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/apic.c
+++ linux-2.6.23/arch/i386/kernel/apic.c
@@ -250,7 +250,7 @@ static void lapic_timer_setup(enum clock
 	if (!local_apic_timer_verify_ok)
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -269,7 +269,7 @@ static void lapic_timer_setup(enum clock
 		break;
 	}
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 /*
@@ -716,13 +716,18 @@ void lapic_shutdown(void)
 	if (!cpu_has_apic)
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	clear_local_APIC();
 
 	if (enabled_via_apicbase)
 		disable_local_APIC();
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
+}
+
+int __ipipe_check_lapic(void)
+{
+	return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY);
 }
 
 /*
@@ -1272,7 +1277,7 @@ void smp_spurious_interrupt(struct pt_re
 	 */
 	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
+		__ack_APIC_irq();
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
@@ -1329,6 +1334,12 @@ void __init apic_intr_init(void)
 #ifdef CONFIG_X86_MCE_P4THERMAL
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
+#ifdef CONFIG_IPIPE
+	set_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0);
+	set_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1);
+	set_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2);
+	set_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3);
+#endif
 }
 
 /**
@@ -1467,9 +1478,9 @@ static int lapic_suspend(struct sys_devi
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	disable_local_APIC();
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return 0;
 }
 
@@ -1484,7 +1495,7 @@ static int lapic_resume(struct sys_devic
 
 	maxlvt = lapic_get_maxlvt();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	/*
 	 * Make sure the APICBASE points to the right address
@@ -1519,7 +1530,7 @@ static int lapic_resume(struct sys_devic
 	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return 0;
 }
 
Index: linux-2.6.23/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/entry.S
+++ linux-2.6.23/arch/i386/kernel/entry.S
@@ -44,6 +44,7 @@
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
+#include <asm/ipipe_base.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -75,6 +76,58 @@ DF_MASK		= 0x00000400 
 NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
+#ifdef CONFIG_IPIPE
+#define EMULATE_ROOT_IRET(bypass) \
+				call __ipipe_unstall_iret_root ; \
+				TRACE_IRQS_ON ; \
+				bypass: \
+				movl PT_EAX(%esp),%eax
+#define TEST_PREEMPTIBLE(regs)  call __ipipe_kpreempt_root ; testl %eax,%eax
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2)	\
+				call __ipipe_syscall_root ; \
+				testl  %eax,%eax ; \
+				js    bypass1 ; \
+				jne   bypass2 ; \
+				movl PT_ORIG_EAX(%esp),%eax
+#define PUSH_XCODE(v)		pushl $ ex_ ## v
+#define PUSH_XVEC(v)		pushl $ ex_ ## v
+#define HANDLE_EXCEPTION(code)	movl %code,%ecx ; \
+				call __ipipe_handle_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck_notrace
+#define DIVERT_EXCEPTION(code)	movl $(__USER_DS), %ecx	; \
+				movl %ecx, %ds ; \
+				movl %ecx, %es ; \
+				movl %esp, %eax	; \
+				movl $ex_ ## code,%edx ; \
+				call __ipipe_divert_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck_notrace
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+# define IPIPE_TRACE_IRQ_ENTER \
+	lea PT_EIP-4(%esp), %ebp; \
+	movl PT_ORIG_EAX(%esp), %eax; \
+	call ipipe_trace_begin
+# define IPIPE_TRACE_IRQ_EXIT \
+	pushl %eax; \
+	movl PT_ORIG_EAX+4(%esp), %eax; \
+	call ipipe_trace_end; \
+	popl %eax
+#else  /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+#define IPIPE_TRACE_IRQ_ENTER
+#define IPIPE_TRACE_IRQ_EXIT
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+#else /* !CONFIG_IPIPE */
+#define EMULATE_ROOT_IRET(bypass)
+#define TEST_PREEMPTIBLE(regs)		testl $IF_MASK,PT_EFLAGS(regs)
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2)
+#define PUSH_XCODE(v)			pushl $v
+#define PUSH_XVEC(v)			pushl v
+#define HANDLE_EXCEPTION(code)		call *%code
+#define DIVERT_EXCEPTION(code)
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -214,6 +267,7 @@ VM_MASK		= 0x00020000
 	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 
 ENTRY(ret_from_fork)
+	ENABLE_INTERRUPTS_HW_COND
 	CFI_STARTPROC
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -241,7 +295,7 @@ END(ret_from_fork)
 	RING0_PTREGS_FRAME
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
-ret_from_intr:
+ENTRY(ret_from_intr)
 	GET_THREAD_INFO(%ebp)
 check_userspace:
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
@@ -263,14 +317,14 @@ END(ret_from_exception)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-	DISABLE_INTERRUPTS(CLBR_ANY)
+	DISABLE_INTERRUPTS_HW(CLBR_ANY)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_nocheck
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
-	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
+ 	TEST_PREEMPTIBLE(%esp)		# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
@@ -293,7 +347,7 @@ sysenter_past_esp:
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
+	ENABLE_INTERRUPTS_HW(CLBR_NONE)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
@@ -330,6 +384,7 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+	CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -338,16 +393,20 @@ sysenter_past_esp:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+sysenter_tail:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
 /* if something modifies registers it must also disable sysexit */
+	EMULATE_ROOT_IRET(sysenter_exit)
 	movl PT_EIP(%esp), %edx
 	movl PT_OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
-	TRACE_IRQS_ON
+#ifndef CONFIG_IPIPE
+  	TRACE_IRQS_ON
+#endif
 1:	mov  PT_FS(%esp), %fs
 	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
@@ -367,6 +426,7 @@ ENTRY(system_call)
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+ 	CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck_notrace)
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -401,7 +461,11 @@ restore_all:
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
+#ifdef CONFIG_IPIPE
+	call __ipipe_unstall_iret_root
+#else /* !CONFIG_IPIPE */
 	TRACE_IRQS_IRET
+#endif /* CONFIG_IPIPE */
 restore_nocheck_notrace:
 	RESTORE_REGS
 	addl $4, %esp			# skip orig_eax/error_code
@@ -410,7 +474,7 @@ restore_nocheck_notrace:
 .section .fixup,"ax"
 iret_exc:
 	pushl $0			# no error code
-	pushl $do_iret_error
+	PUSH_XCODE(do_iret_error)
 	jmp error_code
 .previous
 .section __ex_table,"a"
@@ -451,7 +515,7 @@ ldt_ss:
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	DISABLE_INTERRUPTS(CLBR_EAX)
+	DISABLE_INTERRUPTS_HW(CLBR_EAX)
 	TRACE_IRQS_OFF
 	lss (%esp), %esp
 	CFI_ADJUST_CFA_OFFSET -8
@@ -466,6 +530,7 @@ work_pending:
 	testb $_TIF_NEED_RESCHED, %cl
 	jz work_notifysig
 work_resched:
+	ENABLE_INTERRUPTS_HW_COND
 	call schedule
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
@@ -605,6 +670,47 @@ END(irq_entries_start)
 END(interrupt)
 .previous
 
+#ifdef CONFIG_IPIPE
+	ALIGN
+common_interrupt:
+	SAVE_ALL
+	IPIPE_TRACE_IRQ_ENTER
+	call __ipipe_handle_irq
+	IPIPE_TRACE_IRQ_EXIT
+	testl %eax,%eax
+	jnz  ret_from_intr
+	RESTORE_REGS
+	addl $4, %esp
+	iret
+	CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr)	\
+ENTRY(name)				\
+	RING0_INT_FRAME;		\
+	pushl $~(nr-FIRST_SYSTEM_VECTOR+NR_IRQS);\
+	CFI_ADJUST_CFA_OFFSET 4;	\
+	SAVE_ALL;			\
+	IPIPE_TRACE_IRQ_ENTER;	\
+	call __ipipe_handle_irq;	\
+	IPIPE_TRACE_IRQ_EXIT;		\
+	testl %eax,%eax;		\
+	jnz  ret_from_intr;		\
+	RESTORE_REGS;			\
+	addl $4, %esp;			\
+	iret;			 	\
+	CFI_ENDPROC
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0)
+	BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1)
+	BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2)
+	BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3)
+#ifdef CONFIG_SMP
+	BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR)
+#endif
+#endif
+
+#else /* !CONFIG_IPIPE */
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -632,12 +738,14 @@ ENTRY(name)				\
 	CFI_ENDPROC;			\
 ENDPROC(name)
 
+#endif /* !CONFIG_IPIPE */
+
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
 KPROBE_ENTRY(page_fault)
 	RING0_EC_FRAME
-	pushl $do_page_fault
+	PUSH_XCODE(do_page_fault)
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
@@ -688,7 +796,7 @@ error_code:
 	movl %ecx, %ds
 	movl %ecx, %es
 	movl %esp,%eax			# pt_regs pointer
-	call *%edi
+	HANDLE_EXCEPTION(edi)
 	jmp ret_from_exception
 	CFI_ENDPROC
 KPROBE_END(page_fault)
@@ -697,7 +805,7 @@ ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_error
+	PUSH_XCODE(do_coprocessor_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -707,7 +815,7 @@ ENTRY(simd_coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_simd_coprocessor_error
+	PUSH_XCODE(do_simd_coprocessor_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -718,6 +826,7 @@ ENTRY(device_not_available)
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(device_not_available)
 	GET_CR0_INTO_EAX
 	testl $0x4, %eax		# EM (math emulation bit)
 	jne device_not_available_emulate
@@ -771,6 +880,7 @@ debug_stack_correct:
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(do_debug)
 	xorl %edx,%edx			# error code 0
 	movl %esp,%eax			# pt_regs pointer
 	call do_debug
@@ -889,6 +999,7 @@ KPROBE_ENTRY(int3)
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(do_int3)
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_int3
@@ -900,7 +1011,7 @@ ENTRY(overflow)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_overflow
+	PUSH_XCODE(do_overflow)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -910,7 +1021,7 @@ ENTRY(bounds)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_bounds
+	PUSH_XCODE(do_bounds)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -920,7 +1031,7 @@ ENTRY(invalid_op)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_invalid_op
+	PUSH_XCODE(do_invalid_op)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -930,7 +1041,7 @@ ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_segment_overrun
+ 	PUSH_XCODE(do_coprocessor_segment_overrun)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -938,7 +1049,7 @@ END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
-	pushl $do_invalid_TSS
+ 	PUSH_XCODE(do_invalid_TSS)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -946,7 +1057,7 @@ END(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
-	pushl $do_segment_not_present
+ 	PUSH_XCODE(do_segment_not_present)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -954,7 +1065,7 @@ END(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
-	pushl $do_stack_segment
+ 	PUSH_XCODE(do_stack_segment)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -962,7 +1073,7 @@ END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
 	RING0_EC_FRAME
-	pushl $do_general_protection
+ 	PUSH_XCODE(do_general_protection)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -970,7 +1081,7 @@ KPROBE_END(general_protection)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
-	pushl $do_alignment_check
+	PUSH_XCODE(do_alignment_check)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -980,7 +1091,7 @@ ENTRY(divide_error)
 	RING0_INT_FRAME
 	pushl $0			# no error code
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_divide_error
+	PUSH_XCODE(do_divide_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -991,7 +1102,7 @@ ENTRY(machine_check)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl machine_check_vector
+	PUSH_XVEC(machine_check_vector)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -1002,7 +1113,7 @@ ENTRY(spurious_interrupt_bug)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_spurious_interrupt_bug
+	PUSH_XCODE(do_spurious_interrupt_bug)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
Index: linux-2.6.23/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.23/arch/i386/kernel/i8253.c
@@ -4,6 +4,7 @@
  */
 #include <linux/clockchips.h>
 #include <linux/init.h>
+#include <linux/ipipe.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
@@ -15,7 +16,7 @@
 #include <asm/io.h>
 #include <asm/timer.h>
 
-DEFINE_SPINLOCK(i8253_lock);
+IPIPE_DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
 /*
@@ -133,6 +134,12 @@ static cycle_t pit_read(void)
 	static int old_count;
 	static u32 old_jifs;
 
+#ifdef CONFIG_IPIPE
+	if (!__ipipe_pipeline_head_p(ipipe_root_domain))
+		/* We don't really own the PIT. */
+		return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count;
+#endif /* CONFIG_IPIPE */
+
 	spin_lock_irqsave(&i8253_lock, flags);
 	/*
 	 * Although our caller may have the read side of xtime_lock,
Index: linux-2.6.23/arch/i386/kernel/i8259.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/i8259.c
+++ linux-2.6.23/arch/i386/kernel/i8259.c
@@ -34,7 +34,7 @@
  */
 
 static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
+IPIPE_DEFINE_SPINLOCK(i8259A_lock);
 static void mask_and_ack_8259A(unsigned int);
 
 static struct irq_chip i8259A_chip = {
@@ -71,6 +71,7 @@ void disable_8259A_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259A_lock, flags);
+	ipipe_irq_lock(irq);
 	cached_irq_mask |= mask;
 	if (irq & 8)
 		outb(cached_slave_mask, PIC_SLAVE_IMR);
@@ -81,15 +82,18 @@ void disable_8259A_irq(unsigned int irq)
 
 void enable_8259A_irq(unsigned int irq)
 {
-	unsigned int mask = ~(1 << irq);
+	unsigned int mask = (1 << irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
+	if (cached_irq_mask & mask) {
+		cached_irq_mask &= ~mask;
+		if (irq & 8)
+			outb(cached_slave_mask, PIC_SLAVE_IMR);
+		else
+			outb(cached_master_mask, PIC_MASTER_IMR);
+		ipipe_irq_unlock(irq);
+	}
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -170,6 +174,15 @@ static void mask_and_ack_8259A(unsigned 
 	 */
 	if (cached_irq_mask & irqmask)
 		goto spurious_8259A_irq;
+#ifdef CONFIG_IPIPE
+	if (irq == 0) {
+	    /* Fast timer ack -- don't mask (unless supposedly
+	      spurious) */
+	    outb(0x60, PIC_MASTER_CMD);	/* Specific EOI to master. */
+	    spin_unlock_irqrestore(&i8259A_lock, flags);
+	    return;
+	}
+#endif /* CONFIG_IPIPE */
 	cached_irq_mask |= irqmask;
 
 handle_real_irq:
Index: linux-2.6.23/arch/i386/kernel/io_apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/io_apic.c
+++ linux-2.6.23/arch/i386/kernel/io_apic.c
@@ -56,8 +56,8 @@ atomic_t irq_mis_count;
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static IPIPE_DEFINE_SPINLOCK(ioapic_lock);
+static IPIPE_DEFINE_SPINLOCK(vector_lock);
 
 int timer_over_8254 __initdata = 1;
 
@@ -278,6 +278,7 @@ static void mask_IO_APIC_irq (unsigned i
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
+	ipipe_irq_lock(irq);
 	__mask_IO_APIC_irq(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
@@ -288,6 +289,7 @@ static void unmask_IO_APIC_irq (unsigned
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__unmask_IO_APIC_irq(irq);
+	ipipe_irq_unlock(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -729,8 +731,10 @@ late_initcall(balanced_irq_init);
 #ifndef CONFIG_SMP
 void fastcall send_IPI_self(int vector)
 {
+	unsigned long flags;
 	unsigned int cfg;
 
+	local_irq_save_hw_cond(flags);
 	/*
 	 * Wait for idle.
 	 */
@@ -740,6 +744,7 @@ void fastcall send_IPI_self(int vector)
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	apic_write_around(APIC_ICR, cfg);
+	local_irq_restore_hw_cond(flags);
 }
 #endif /* !CONFIG_SMP */
 
@@ -1939,6 +1944,7 @@ static unsigned int startup_ioapic_irq(u
 			was_pending = 1;
 	}
 	__unmask_IO_APIC_irq(irq);
+	ipipe_irq_unlock(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -1946,8 +1952,10 @@ static unsigned int startup_ioapic_irq(u
 
 static void ack_ioapic_irq(unsigned int irq)
 {
+#ifndef CONFIG_IPIPE
 	move_native_irq(irq);
-	ack_APIC_irq();
+#endif /* CONFIG_IPIPE */
+	__ack_APIC_irq();
 }
 
 static void ack_ioapic_quirk_irq(unsigned int irq)
@@ -1955,7 +1963,9 @@ static void ack_ioapic_quirk_irq(unsigne
 	unsigned long v;
 	int i;
 
+#ifndef CONFIG_IPIPE
 	move_native_irq(irq);
+#endif /* CONFIG_IPIPE */
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1979,7 +1989,7 @@ static void ack_ioapic_quirk_irq(unsigne
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
-	ack_APIC_irq();
+	__ack_APIC_irq();
 
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
@@ -1988,6 +1998,17 @@ static void ack_ioapic_quirk_irq(unsigne
 		__unmask_and_level_IO_APIC_irq(irq);
 		spin_unlock(&ioapic_lock);
 	}
+
+#ifdef CONFIG_IPIPE
+/*
+ * Prevent low priority IRQs grabbed by high priority domains from
+ * being delayed, waiting for a high priority interrupt handler
+ * running in a low priority domain to complete.
+ */
+	spin_lock(&ioapic_lock);
+	__mask_IO_APIC_irq(irq);
+	spin_unlock(&ioapic_lock);
+#endif
 }
 
 static int ioapic_retrigger_irq(unsigned int irq)
@@ -2049,23 +2070,29 @@ static inline void init_IO_APIC_traps(vo
 
 static void ack_apic(unsigned int irq)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 static void mask_lapic_irq (unsigned int irq)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	spin_lock_irqsave(&ioapic_lock, flags);
+	ipipe_irq_lock(irq);
 	v = apic_read(APIC_LVT0);
 	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void unmask_lapic_irq (unsigned int irq)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	spin_lock_irqsave(&ioapic_lock, flags);
 	v = apic_read(APIC_LVT0);
 	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	ipipe_irq_unlock(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static struct irq_chip lapic_chip __read_mostly = {
Index: linux-2.6.23/arch/i386/kernel/ipipe.c
===================================================================
--- /dev/null
+++ linux-2.6.23/arch/i386/kernel/ipipe.c
@@ -0,0 +1,817 @@
+/*   -*- linux-c -*-
+ *   linux/arch/i386/kernel/ipipe.c
+ *
+ *   Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Architecture-dependent I-PIPE support for x86.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/clockchips.h>
+#include <asm/unistd.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm/io.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif	/* CONFIG_X86_IO_APIC */
+#include <asm/apic.h>
+#include <mach_ipi.h>
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+extern struct clock_event_device *global_clock_event;
+
+extern struct clock_event_device pit_clockevent;
+
+int __ipipe_tick_irq;
+
+#ifdef CONFIG_SMP
+
+static cpumask_t __ipipe_cpu_sync_map;
+
+static cpumask_t __ipipe_cpu_lock_map;
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
+
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
+
+static void (*__ipipe_cpu_sync) (void);
+
+#endif /* CONFIG_SMP */
+
+/* ipipe_trigger_irq() -- Push the interrupt at front of the pipeline
+   just like if it has been actually received from a hw source. Also
+   works for virtual interrupts. */
+
+int fastcall ipipe_trigger_irq(unsigned irq)
+{
+	struct pt_regs regs;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS ||
+	    (ipipe_virtual_irq_p(irq) &&
+	     !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)))
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	regs.orig_eax = irq;	/* Won't be acked */
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = flags;
+
+	__ipipe_handle_irq(regs);
+
+	local_irq_restore_hw(flags);
+
+	return 1;
+}
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
+{
+	info->ncpus = num_online_cpus();
+	info->cpufreq = ipipe_cpu_freq();
+	info->archdep.tmirq = __ipipe_tick_irq;
+#ifdef CONFIG_X86_TSC
+	info->archdep.tmfreq = ipipe_cpu_freq();
+#else	/* !CONFIG_X86_TSC */
+	info->archdep.tmfreq = CLOCK_TICK_RATE;
+#endif	/* CONFIG_X86_TSC */
+
+	return 0;
+}
+
+fastcall unsigned int do_IRQ(struct pt_regs *regs);
+fastcall void smp_apic_timer_interrupt(struct pt_regs *regs);
+fastcall void smp_spurious_interrupt(struct pt_regs *regs);
+fastcall void smp_error_interrupt(struct pt_regs *regs);
+fastcall void smp_thermal_interrupt(struct pt_regs *regs);
+fastcall void smp_reschedule_interrupt(struct pt_regs *regs);
+fastcall void smp_invalidate_interrupt(struct pt_regs *regs);
+fastcall void smp_call_function_interrupt(struct pt_regs *regs);
+
+static int __ipipe_ack_irq(unsigned irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	desc->ipipe_ack(irq, desc);
+	return 1;
+}
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
+{
+	irq_desc[irq].status &= ~IRQ_DISABLED;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static int __ipipe_noack_apic(unsigned irq)
+{
+	return 1;
+}
+
+int __ipipe_ack_apic(unsigned irq)
+{
+	__ack_APIC_irq();
+	return 1;
+}
+
+static void __ipipe_null_handler(unsigned irq, void *cookie)
+{
+}
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
+   interrupts are off, and secondary CPUs are still lost in space. */
+
+void __init __ipipe_enable_pipeline(void)
+{
+	unsigned irq;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+	/* Map the APIC system vectors. */
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
+			     (ipipe_irq_handler_t)&smp_apic_timer_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_spurious_interrupt,
+			     NULL,
+			     &__ipipe_noack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_error_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_thermal_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+
+	__ipipe_tick_irq = global_clock_event == &pit_clockevent ? 0
+		: ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR);
+
+#else	/* !CONFIG_X86_LOCAL_APIC */
+
+	__ipipe_tick_irq = 0;
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_SMP
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
+			     (ipipe_irq_handler_t)&smp_reschedule_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(INVALIDATE_TLB_VECTOR),
+			     (ipipe_irq_handler_t)&smp_invalidate_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
+			     (ipipe_irq_handler_t)&smp_call_function_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#endif	/* CONFIG_SMP */
+
+	/* Finally, virtualize the remaining ISA and IO-APIC
+	 * interrupts. Interrupts which have already been virtualized
+	 * will just beget a silent -EPERM error since
+	 * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */
+
+	for (irq = 0; irq < NR_IRQS; irq++)
+		/* Fails for IPIPE_CRITICAL_IPI but that's ok. */
+		ipipe_virtualize_irq(ipipe_root_domain,
+				     irq,
+				     (ipipe_irq_handler_t)&do_IRQ,
+				     NULL,
+				     &__ipipe_ack_irq,
+				     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/* Eventually allow these vectors to be reprogrammed. */
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK;
+#endif	/* CONFIG_X86_LOCAL_APIC */
+}
+
+#ifdef CONFIG_SMP
+
+cpumask_t __ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
+{
+	cpumask_t oldmask = irq_desc[irq].affinity;
+
+	if (irq_desc[irq].chip->set_affinity == NULL)
+		return CPU_MASK_NONE;
+
+	if (cpus_empty(cpumask))
+		return oldmask; /* Return mask value -- no change. */
+
+	cpus_and(cpumask,cpumask,cpu_online_map);
+
+	if (cpus_empty(cpumask))
+		return CPU_MASK_NONE;	/* Error -- bad mask value or non-routable IRQ. */
+
+	irq_desc[irq].chip->set_affinity(irq,cpumask);
+
+	return oldmask;
+}
+
+int fastcall __ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
+{
+	unsigned long flags;
+	int self;
+
+	if (ipi != IPIPE_SERVICE_IPI0 &&
+	    ipi != IPIPE_SERVICE_IPI1 &&
+	    ipi != IPIPE_SERVICE_IPI2 &&
+	    ipi != IPIPE_SERVICE_IPI3)
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	self = cpu_isset(ipipe_processor_id(),cpumask);
+	cpu_clear(ipipe_processor_id(), cpumask);
+
+	if (!cpus_empty(cpumask))
+		send_IPI_mask(cpumask,ipipe_apic_irq_vector(ipi));
+
+	if (self)
+		ipipe_trigger_irq(ipi);
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+/* Always called with hw interrupts off. */
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie)
+{
+	int cpu = ipipe_processor_id();
+
+	cpu_set(cpu, __ipipe_cpu_sync_map);
+
+	/* Now we are in sync with the lock requestor running on another
+	   CPU. Enter a spinning wait until he releases the global
+	   lock. */
+	spin_lock(&__ipipe_cpu_barrier);
+
+	/* Got it. Now get out. */
+
+	if (__ipipe_cpu_sync)
+		/* Call the sync routine if any. */
+		__ipipe_cpu_sync();
+
+	spin_unlock(&__ipipe_cpu_barrier);
+
+	cpu_clear(cpu, __ipipe_cpu_sync_map);
+}
+
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
+{
+	ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic;
+	ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync;
+	ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL;
+	/* Immediately handle in the current domain but *never* pass */
+	ipd->irqs[IPIPE_CRITICAL_IPI].control =
+		IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK;
+}
+
+#endif	/* CONFIG_SMP */
+
+/* ipipe_critical_enter() -- Grab the superlock excluding all CPUs
+   but the current one from a critical section. This lock is used when
+   we must enforce a global critical section for a single CPU in a
+   possibly SMP system whichever context the CPUs are running. */
+
+unsigned long ipipe_critical_enter(void (*syncfn) (void))
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+#ifdef CONFIG_SMP
+	if (unlikely(num_online_cpus() == 1))	/* We might be running a SMP-kernel on a UP box... */
+		return flags;
+
+	{
+		int cpu = ipipe_processor_id();
+		cpumask_t lock_map;
+
+		if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) {
+			while (cpu_test_and_set(BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) {
+				int n = 0;
+				do {
+					cpu_relax();
+				} while (++n < cpu);
+			}
+
+			spin_lock(&__ipipe_cpu_barrier);
+
+			__ipipe_cpu_sync = syncfn;
+
+			/* Send the sync IPI to all processors but the current one. */
+			send_IPI_allbutself(IPIPE_CRITICAL_VECTOR);
+
+			cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map);
+
+			while (!cpus_equal(__ipipe_cpu_sync_map, lock_map))
+				cpu_relax();
+		}
+
+		atomic_inc(&__ipipe_critical_count);
+	}
+#endif	/* CONFIG_SMP */
+
+	return flags;
+}
+
+/* ipipe_critical_exit() -- Release the superlock. */
+
+void ipipe_critical_exit(unsigned long flags)
+{
+#ifdef CONFIG_SMP
+	if (num_online_cpus() == 1)
+		goto out;
+
+	if (atomic_dec_and_test(&__ipipe_critical_count)) {
+		spin_unlock(&__ipipe_cpu_barrier);
+
+		while (!cpus_empty(__ipipe_cpu_sync_map))
+			cpu_relax();
+
+		cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map);
+		cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map);
+	}
+out:
+#endif	/* CONFIG_SMP */
+
+	local_irq_restore_hw(flags);
+}
+
+static inline void __fixup_if(struct pt_regs *regs)
+{
+	if (!ipipe_root_domain_p)
+		return;
+
+	/*
+	 * Have the saved hw state look like the domain stall bit, so
+	 * that __ipipe_unstall_iret_root() restores the proper
+	 * pipeline state for the root stage upon exit.
+	 */
+
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		regs->eflags &= ~X86_EFLAGS_IF;
+	else
+		regs->eflags |= X86_EFLAGS_IF;
+}
+
+/*  Check the stall bit of the root domain to make sure the existing
+    preemption opportunity upon in-kernel resumption could be
+    exploited. In case a rescheduling could take place, the root stage
+    is stalled before the hw interrupts are re-enabled. This routine
+    must be called with hw interrupts off. */
+
+asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs)
+{
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		/* Root stage is stalled: rescheduling denied. */
+		return 0;
+
+	__ipipe_stall_root();
+	local_irq_enable_hw_notrace();
+
+	return 1;	/* Ok, may reschedule now. */
+}
+
+asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs)
+{
+	/* Emulate IRET's handling of the interrupt flag. */
+
+	local_irq_disable_hw();
+
+	/* Restore the software state as it used to be on kernel
+	   entry. CAUTION: NMIs must *not* return through this
+	   emulation. */
+
+	if (!(regs.eflags & X86_EFLAGS_IF)) {
+		if (!__test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+			trace_hardirqs_off();
+		regs.eflags |= X86_EFLAGS_IF;
+	} else {
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) {
+			trace_hardirqs_on();
+			__clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+		}
+
+		/* Only sync virtual IRQs here, so that we don't recurse
+		   indefinitely in case of an external interrupt flood. */
+
+		if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) != 0)
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
+	}
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+	ipipe_trace_end(0x8000000D);
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+}
+
+asmlinkage int __ipipe_syscall_root(struct pt_regs regs)
+{
+	unsigned long flags;
+
+	__fixup_if(&regs);
+
+	/* This routine either returns:
+	    0 -- if the syscall is to be passed to Linux;
+	   >0 -- if the syscall should not be passed to Linux, and no
+	   tail work should be performed;
+	   <0 -- if the syscall should not be passed to Linux but the
+	   tail work has to be performed (for handling signals etc). */
+
+	if (__ipipe_syscall_watched_p(current, regs.orig_eax) &&
+	    __ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL) &&
+	    __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL,&regs) > 0) {
+		/* We might enter here over a non-root domain and exit
+		 * over the root one as a result of the syscall
+		 * (i.e. by recycling the register set of the current
+		 * context across the migration), so we need to fixup
+		 * the interrupt flag upon return too, so that
+		 * __ipipe_unstall_iret_root() resets the correct
+		 * stall bit on exit. */
+		__fixup_if(&regs);
+
+		if (ipipe_root_domain_p && !in_atomic()) {
+			/* Sync pending VIRQs before _TIF_NEED_RESCHED is tested. */
+			local_irq_save_hw(flags);
+			if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) != 0)
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
+			local_irq_restore_hw(flags);
+			return -1;
+		}
+		return 1;
+	}
+
+    return 0;
+}
+
+static fastcall void do_machine_check_vector(struct pt_regs *regs, long error_code)
+{
+#ifdef CONFIG_X86_MCE
+	extern fastcall void (*machine_check_vector)(struct pt_regs *, long);
+	machine_check_vector(regs,error_code);
+#endif /* CONFIG_X86_MCE */
+}
+
+fastcall void do_divide_error(struct pt_regs *regs, long error_code);
+fastcall void do_overflow(struct pt_regs *regs, long error_code);
+fastcall void do_bounds(struct pt_regs *regs, long error_code);
+fastcall void do_invalid_op(struct pt_regs *regs, long error_code);
+fastcall void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
+fastcall void do_invalid_TSS(struct pt_regs *regs, long error_code);
+fastcall void do_segment_not_present(struct pt_regs *regs, long error_code);
+fastcall void do_stack_segment(struct pt_regs *regs, long error_code);
+fastcall void do_general_protection(struct pt_regs *regs, long error_code);
+fastcall void do_page_fault(struct pt_regs *regs, long error_code);
+fastcall void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
+fastcall void do_coprocessor_error(struct pt_regs *regs, long error_code);
+fastcall void do_alignment_check(struct pt_regs *regs, long error_code);
+fastcall void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
+fastcall void do_iret_error(struct pt_regs *regs, long error_code);
+
+/* Work around genksyms's issue with over-qualification in decls. */
+
+typedef fastcall void __ipipe_exhandler(struct pt_regs *, long);
+
+typedef __ipipe_exhandler *__ipipe_exptr;
+
+static __ipipe_exptr __ipipe_std_extable[] = {
+
+	[ex_do_divide_error] = &do_divide_error,
+	[ex_do_overflow] = &do_overflow,
+	[ex_do_bounds] = &do_bounds,
+	[ex_do_invalid_op] = &do_invalid_op,
+	[ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun,
+	[ex_do_invalid_TSS] = &do_invalid_TSS,
+	[ex_do_segment_not_present] = &do_segment_not_present,
+	[ex_do_stack_segment] = &do_stack_segment,
+	[ex_do_general_protection] = do_general_protection,
+	[ex_do_page_fault] = &do_page_fault,
+	[ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug,
+	[ex_do_coprocessor_error] = &do_coprocessor_error,
+	[ex_do_alignment_check] = &do_alignment_check,
+	[ex_machine_check_vector] = &do_machine_check_vector,
+	[ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error,
+	[ex_do_iret_error] = &do_iret_error,
+};
+
+#ifdef CONFIG_KGDB
+#include <linux/kgdb.h>
+
+static int __ipipe_xlate_signo[] = {
+
+	[ex_do_divide_error] = SIGFPE,
+	[ex_do_debug] = SIGTRAP,
+	[2] = -1,
+	[ex_do_int3] = SIGTRAP,
+	[ex_do_overflow] = SIGSEGV,
+	[ex_do_bounds] = SIGSEGV,
+	[ex_do_invalid_op] = SIGILL,
+	[ex_device_not_available] = -1,
+	[8] = -1,
+	[ex_do_coprocessor_segment_overrun] = SIGFPE,
+	[ex_do_invalid_TSS] = SIGSEGV,
+	[ex_do_segment_not_present] = SIGBUS,
+	[ex_do_stack_segment] = SIGBUS,
+	[ex_do_general_protection] = SIGSEGV,
+	[ex_do_page_fault] = SIGSEGV,
+	[ex_do_spurious_interrupt_bug] = -1,
+	[ex_do_coprocessor_error] = -1,
+	[ex_do_alignment_check] = SIGBUS,
+	[ex_machine_check_vector] = -1,
+	[ex_do_simd_coprocessor_error] = -1,
+	[20 ... 31] = -1,
+	[ex_do_iret_error] = SIGSEGV,
+};
+#endif /* CONFIG_KGDB */
+
+fastcall int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	/* Track the hw interrupt state before calling the Linux
+	 * exception handler, replicating it into the virtual mask. */
+
+	if (irqs_disabled_hw()) {
+		/* Do not trigger the alarm in ipipe_check_context() by using
+		 * plain local_irq_disable(). */
+		__ipipe_stall_root();
+		trace_hardirqs_off();
+		barrier();
+	}
+
+#ifdef CONFIG_KGDB
+	/* catch exception KGDB is interested in over non-root domains */
+	if (!ipipe_root_domain_p &&
+	    __ipipe_xlate_signo[vector] >= 0 &&
+	    !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], error_code, regs)) {
+		local_irq_restore(flags);
+		return 1;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (!ipipe_trap_notify(vector, regs)) {
+		__ipipe_exptr handler = __ipipe_std_extable[vector];
+		handler(regs,error_code);
+		local_irq_restore(flags);
+		__fixup_if(regs);
+		return 0;
+	}
+
+	local_irq_restore(flags);
+
+	return 1;
+}
+
+fastcall int __ipipe_divert_exception(struct pt_regs *regs, int vector)
+{
+#ifdef CONFIG_KGDB
+	/* catch int1 and int3 over non-root domains */
+	if (!ipipe_root_domain_p && vector != ex_device_not_available) {
+		unsigned int condition = 0;
+		if (vector == 1)
+			get_debugreg(condition, 6);
+		if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs))
+			return 1;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (ipipe_trap_notify(vector, regs))
+		return 1;
+
+	__fixup_if(regs);
+
+	return 0;
+}
+
+/* __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic
+   interrupt protection log is maintained here for each domain.  Hw
+   interrupts are off on entry. */
+
+int __ipipe_handle_irq(struct pt_regs regs)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	unsigned irq = regs.orig_eax;
+	struct list_head *head, *pos;
+	int m_ack;
+
+	if ((long)regs.orig_eax < 0) {
+		irq = ~irq;
+		m_ack = 0;
+	} else /* This is a self-triggered interrupt. */
+		m_ack = 1;
+
+	head = __ipipe_pipeline.next;
+	next_domain = list_entry(head, struct ipipe_domain, p_link);
+	if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) {
+		if (!m_ack && next_domain->irqs[irq].acknowledge != NULL)
+			next_domain->irqs[irq].acknowledge(irq);
+		if (likely(__ipipe_dispatch_wired(next_domain, irq))) {
+			goto finalize;
+		} else
+			goto finalize_nosync;
+	}
+
+	this_domain = ipipe_current_domain;
+
+	if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))
+		head = &this_domain->p_link;
+
+	/* Ack the interrupt. */
+
+	pos = head;
+
+	while (pos != &__ipipe_pipeline) {
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		/*
+		 * For each domain handling the incoming IRQ, mark it
+		 * as pending in its log.
+		 */
+		if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) {
+			/*
+			 * Domains that handle this IRQ are polled for
+			 * acknowledging it by decreasing priority
+			 * order. The interrupt must be made pending
+			 * _first_ in the domain's status flags before
+			 * the PIC is unlocked.
+			 */
+			__ipipe_set_irq_pending(next_domain, irq);
+
+			if (!m_ack && next_domain->irqs[irq].acknowledge != NULL)
+				m_ack = next_domain->irqs[irq].acknowledge(irq);
+		}
+
+		/*
+		 * If the domain does not want the IRQ to be passed
+		 * down the interrupt pipe, exit the loop now.
+		 */
+
+		if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control))
+			break;
+
+		pos = next_domain->p_link.next;
+	}
+
+finalize:
+
+	/* Given our deferred dispatching model for regular IRQs, we
+	 * only record CPU regs for the last timer interrupt, so that
+	 * the timer handler charges CPU times properly. It is assumed
+	 * that other interrupt handlers don't actually care for such
+	 * information. */
+
+	if (irq == __ipipe_tick_irq)
+		set_irq_regs(&regs);
+
+	/*
+	 * Now walk the pipeline, yielding control to the highest
+	 * priority domain that has pending interrupt(s) or
+	 * immediately to the current domain if the interrupt has been
+	 * marked as 'sticky'. This search does not go beyond the
+	 * current domain in the pipeline.
+	 */
+
+	__ipipe_walk_pipeline(head);
+
+finalize_nosync:
+
+	if (!ipipe_root_domain_p ||
+	    test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		return 0;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Prevent a spurious rescheduling from being triggered on
+	 * preemptible kernels along the way out through
+	 * ret_from_intr.
+	 */
+	if ((long)regs.orig_eax < 0)
+		__set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+#endif	/* CONFIG_SMP */
+
+	return 1;
+}
+
+int __ipipe_check_tickdev(const char *devname)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (strcmp(devname, "lapic"))
+		return __ipipe_check_lapic();
+#endif
+
+	return 1;
+}
+
+EXPORT_SYMBOL(__ipipe_tick_irq);
+EXPORT_SYMBOL(ipipe_critical_enter);
+EXPORT_SYMBOL(ipipe_critical_exit);
+EXPORT_SYMBOL(ipipe_trigger_irq);
+EXPORT_SYMBOL(ipipe_get_sysinfo);
+
+EXPORT_SYMBOL_GPL(irq_desc);
+EXPORT_SYMBOL_GPL(__switch_to);
+EXPORT_SYMBOL_GPL(show_stack);
+EXPORT_PER_CPU_SYMBOL_GPL(init_tss);
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+EXPORT_SYMBOL(tasklist_lock);
+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
+#ifdef CONFIG_SMP
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate);
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+void notrace mcount(void);
+EXPORT_SYMBOL(mcount);
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
Index: linux-2.6.23/arch/i386/kernel/mcount.S
===================================================================
--- /dev/null
+++ linux-2.6.23/arch/i386/kernel/mcount.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/i386/mcount.S
+ *
+ *  Copyright (C) 2005, 2007 Jan Kiszka
+ */
+
+.globl mcount
+mcount:
+	cmpl $0,ipipe_trace_enable
+	je out
+
+	pushl %ebp
+	movl %esp,%ebp
+
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+
+	pushl $0		# no additional value (v)
+	movl (%ebp),%eax
+	movl 0x4(%ebp),%edx	# __CALLER_ADDR0
+	movl 0x4(%eax),%ecx	# __CALLER_ADDR1
+	movl $0,%eax		# IPIPE_TRACE_FUNC
+	call __ipipe_trace
+	popl %eax
+
+	popl %edx
+	popl %ecx
+	popl %eax
+	popl %ebp
+out:
+	ret
Index: linux-2.6.23/arch/i386/kernel/nmi.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/nmi.c
+++ linux-2.6.23/arch/i386/kernel/nmi.c
@@ -48,6 +48,10 @@ static unsigned int nmi_hz = HZ;
 
 static DEFINE_PER_CPU(short, wd_enabled);
 
+static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason);
+int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick;
+EXPORT_SYMBOL(nmi_watchdog_tick);
+
 /* local prototypes */
 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
 
@@ -317,9 +321,7 @@ void touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
-extern void die_nmi(struct pt_regs *, const char *msg);
-
-__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
+static __kprobes int default_nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 {
 
 	/*
Index: linux-2.6.23/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/process.c
+++ linux-2.6.23/arch/i386/kernel/process.c
@@ -197,6 +197,7 @@ void cpu_idle(void)
 				play_dead();
 
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ 			ipipe_suspend_domain();
 			idle();
 		}
 		tick_nohz_restart_sched_tick();
Index: linux-2.6.23/arch/i386/kernel/smp.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/smp.c
+++ linux-2.6.23/arch/i386/kernel/smp.c
@@ -142,6 +142,9 @@ void __send_IPI_shortcut(unsigned int sh
 	 * to the APIC.
 	 */
 	unsigned int cfg;
+	unsigned long flags;
+
+	local_irq_save_hw_cond(flags);
 
 	/*
 	 * Wait for idle.
@@ -157,6 +160,8 @@ void __send_IPI_shortcut(unsigned int sh
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	apic_write_around(APIC_ICR, cfg);
+
+	local_irq_restore_hw_cond(flags);
 }
 
 void fastcall send_IPI_self(int vector)
@@ -205,10 +210,10 @@ void send_IPI_mask_bitmask(cpumask_t cpu
 	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
 	__send_IPI_dest_field(mask, vector);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector)
@@ -222,14 +227,14 @@ void send_IPI_mask_sequence(cpumask_t ma
 	 * should be modified to do 1 message per cluster ID - mbligh
 	 */ 
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
 		if (cpu_isset(query_cpu, mask)) {
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
 		}
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 #include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
@@ -312,7 +317,9 @@ void leave_mm(unsigned long cpu)
 
 fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-	unsigned long cpu;
+	unsigned long cpu, flags;
+
+  	local_irq_save_hw_cond(flags);
 
 	cpu = get_cpu();
 
@@ -342,6 +349,7 @@ fastcall void smp_invalidate_interrupt(s
 	smp_mb__after_clear_bit();
 out:
 	put_cpu_no_resched();
+  	local_irq_restore_hw_cond(flags);
 }
 
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -395,14 +403,17 @@ void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
 	cpumask_t cpu_mask;
+ 	unsigned long flags;
 
 	preempt_disable();
+	local_irq_save_hw_cond(flags);
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(smp_processor_id(), cpu_mask);
 
 	local_flush_tlb();
 	if (!cpus_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+ 	local_irq_restore_hw_cond(flags);
 	preempt_enable();
 }
 
@@ -430,8 +441,11 @@ void flush_tlb_page(struct vm_area_struc
 {
 	struct mm_struct *mm = vma->vm_mm;
 	cpumask_t cpu_mask;
+	unsigned long flags;
 
 	preempt_disable();
+	local_irq_save_hw_cond(flags);
+
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(smp_processor_id(), cpu_mask);
 
@@ -442,6 +456,8 @@ void flush_tlb_page(struct vm_area_struc
 		 	leave_mm(smp_processor_id());
 	}
 
+	local_irq_restore_hw_cond(flags);
+
 	if (!cpus_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, va);
 
@@ -603,7 +619,7 @@ native_smp_call_function_mask(cpumask_t 
 
 static void stop_this_cpu (void * dummy)
 {
-	local_irq_disable();
+	local_irq_disable_hw();
 	/*
 	 * Remove this CPU:
 	 */
Index: linux-2.6.23/arch/i386/kernel/smpboot.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/smpboot.c
+++ linux-2.6.23/arch/i386/kernel/smpboot.c
@@ -1306,6 +1306,11 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+#ifdef CONFIG_IPIPE
+	/* IPI for critical lock */
+	set_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX);
+#endif
 }
 
 /*
Index: linux-2.6.23/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/time.c
+++ linux-2.6.23/arch/i386/kernel/time.c
@@ -165,11 +165,12 @@ irqreturn_t timer_interrupt(int irq, voi
 		 * This will also deassert NMI lines for the watchdog if run
 		 * on an 82489DX-based system.
 		 */
-		spin_lock(&i8259A_lock);
+		unsigned long flags;
+		spin_lock_irqsave_cond(&i8259A_lock,flags);
 		outb(0x0c, PIC_MASTER_OCW3);
 		/* Ack the IRQ; AEOI will end it automatically. */
 		inb(PIC_MASTER_POLL);
-		spin_unlock(&i8259A_lock);
+		spin_unlock_irqrestore_cond(&i8259A_lock,flags);
 	}
 #endif
 
Index: linux-2.6.23/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/traps.c
+++ linux-2.6.23/arch/i386/kernel/traps.c
@@ -320,6 +320,9 @@ void show_registers(struct pt_regs *regs
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, current->pid,
 		current_thread_info(), current, task_thread_info(current));
+#ifdef CONFIG_IPIPE
+	printk(KERN_EMERG "\nI-pipe domain %s", ipipe_current_domain->name);
+#endif /* CONFIG_IPIPE */
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -746,6 +749,8 @@ void __kprobes die_nmi(struct pt_regs *r
 	do_exit(SIGSEGV);
 }
 
+EXPORT_SYMBOL(die_nmi);
+
 static __kprobes void default_do_nmi(struct pt_regs * regs)
 {
 	unsigned char reason = 0;
@@ -788,17 +793,21 @@ static int ignore_nmis;
 
 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
-	int cpu;
+	int cpu, cs;
 
 	nmi_enter();
 
 	cpu = smp_processor_id();
 
+	cs = ipipe_disable_context_check(cpu);
+
 	++nmi_count(cpu);
 
 	if (!ignore_nmis)
 		default_do_nmi(regs);
 
+	ipipe_restore_context_check(cpu, cs);
+
 	nmi_exit();
 }
 
@@ -1092,13 +1101,16 @@ asmlinkage void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
+	unsigned long flags;
 
+	local_irq_save_hw_cond(flags);
 	clts();		/* Allow maths ops (or we recurse) */
 	if (!tsk_used_math(tsk))
 		init_fpu(tsk);
 	restore_fpu(tsk);
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
+	local_irq_restore_hw_cond(flags);
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
Index: linux-2.6.23/arch/i386/kernel/vm86.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/vm86.c
+++ linux-2.6.23/arch/i386/kernel/vm86.c
@@ -148,12 +148,14 @@ struct pt_regs * fastcall save_v86_state
 		do_exit(SIGSEGV);
 	}
 
+	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	current->thread.esp0 = current->thread.saved_esp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_esp0(tss, &current->thread);
 	current->thread.saved_esp0 = 0;
 	put_cpu();
+	local_irq_enable_hw_cond();
 
 	ret = KVM86->regs32;
 
@@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm
 	tsk->thread.saved_fs = info->regs32->xfs;
 	savesegment(gs, tsk->thread.saved_gs);
 
+	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
 	load_esp0(tss, &tsk->thread);
 	put_cpu();
+	local_irq_enable_hw_cond();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
Index: linux-2.6.23/arch/i386/lib/mmx.c
===================================================================
--- linux-2.6.23.orig/arch/i386/lib/mmx.c
+++ linux-2.6.23/arch/i386/lib/mmx.c
@@ -31,7 +31,7 @@ void *_mmx_memcpy(void *to, const void *
 	void *p;
 	int i;
 
-	if (unlikely(in_interrupt()))
+	if (unlikely(!ipipe_root_domain_p || in_interrupt()))
 		return __memcpy(to, from, len);
 
 	p = to;
Index: linux-2.6.23/arch/i386/mach-visws/visws_apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mach-visws/visws_apic.c
+++ linux-2.6.23/arch/i386/mach-visws/visws_apic.c
@@ -28,7 +28,7 @@
 #include "irq_vectors.h"
 
 
-static DEFINE_SPINLOCK(cobalt_lock);
+static IPIPE_DEFINE_SPINLOCK(cobalt_lock);
 
 /*
  * Set the given Cobalt APIC Redirection Table entry to point
Index: linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mach-voyager/voyager_basic.c
+++ linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c
@@ -185,20 +185,20 @@ voyager_timer_interrupt(void)
 		 * pointy.  */
 		__u16 val;
 
-		spin_lock(&i8253_lock);
+		spin_lock_irqsave(&i8253_lock);
 		
 		outb_p(0x00, 0x43);
 		val = inb_p(0x40);
 		val |= inb(0x40) << 8;
-		spin_unlock(&i8253_lock);
+		spin_unlock_irqrestore(&i8253_lock);
 
 		if(val > LATCH) {
 			printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
-			spin_lock(&i8253_lock);
+			spin_lock_irqsave(&i8253_lock);
 			outb(0x34,0x43);
 			outb_p(LATCH & 0xff , 0x40);	/* LSB */
 			outb(LATCH >> 8 , 0x40);	/* MSB */
-			spin_unlock(&i8253_lock);
+			spin_unlock_irqrestore(&i8253_lock);
 		}
 	}
 #ifdef CONFIG_SMP
Index: linux-2.6.23/arch/i386/mm/fault.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mm/fault.c
+++ linux-2.6.23/arch/i386/mm/fault.c
@@ -311,6 +311,8 @@ fastcall void __kprobes do_page_fault(st
 	/* get the address */
         address = read_cr2();
 
+	local_irq_enable_hw_cond();
+
 	tsk = current;
 
 	si_code = SEGV_MAPERR;
@@ -655,3 +657,22 @@ void vmalloc_sync_all(void)
 			start = address + PGDIR_SIZE;
 	}
 }
+
+#ifdef CONFIG_IPIPE
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end)
+{
+	unsigned long next, addr = start;
+
+	do {
+		unsigned long flags;
+		struct page *page;
+
+		next = pgd_addr_end(addr, end);
+		spin_lock_irqsave(&pgd_lock, flags);
+		for (page = pgd_list; page; page = (struct page *)page->index)
+			vmalloc_sync_one(page_address(page), addr);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+
+	} while (addr = next, addr != end);
+}
+#endif /* CONFIG_IPIPE */
Index: linux-2.6.23/drivers/pci/htirq.c
===================================================================
--- linux-2.6.23.orig/drivers/pci/htirq.c
+++ linux-2.6.23/drivers/pci/htirq.c
@@ -21,7 +21,7 @@
  * With multiple simultaneous hypertransport irq devices it might pay
  * to make this more fine grained.  But start with simple, stupid, and correct.
  */
-static DEFINE_SPINLOCK(ht_irq_lock);
+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
 
 struct ht_irq_cfg {
 	struct pci_dev *dev;
Index: linux-2.6.23/include/asm-i386/apic.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/apic.h
+++ linux-2.6.23/include/asm-i386/apic.h
@@ -79,7 +79,13 @@ int get_physical_broadcast(void);
 # define apic_write_around(x,y) apic_write_atomic((x),(y))
 #endif
 
+#ifdef CONFIG_IPIPE
+#define ack_APIC_irq() do { } while(0)
+static inline void __ack_APIC_irq(void)
+#else /* !CONFIG_IPIPE */
+#define __ack_APIC_irq() ack_APIC_irq()
 static inline void ack_APIC_irq(void)
+#endif /* CONFIG_IPIPE */
 {
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction:
Index: linux-2.6.23/include/asm-i386/hw_irq.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/hw_irq.h
+++ linux-2.6.23/include/asm-i386/hw_irq.h
@@ -40,6 +40,13 @@ fastcall void error_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 #define platform_legacy_irq(irq)	((irq) < 16)
+#ifdef CONFIG_IPIPE
+fastcall void ipipe_ipi0(void);
+fastcall void ipipe_ipi1(void);
+fastcall void ipipe_ipi2(void);
+fastcall void ipipe_ipi3(void);
+fastcall void ipipe_ipiX(void);
+#endif
 #endif
 
 void disable_8259A_irq(unsigned int irq);
Index: linux-2.6.23/include/asm-i386/i8253.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/i8253.h
+++ linux-2.6.23/include/asm-i386/i8253.h
@@ -8,7 +8,7 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
-extern spinlock_t i8253_lock;
+extern ipipe_spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
 
Index: linux-2.6.23/include/asm-i386/i8259.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/i8259.h
+++ linux-2.6.23/include/asm-i386/i8259.h
@@ -7,7 +7,7 @@ extern unsigned int cached_irq_mask;
 #define cached_master_mask	(__byte(0, cached_irq_mask))
 #define cached_slave_mask	(__byte(1, cached_irq_mask))
 
-extern spinlock_t i8259A_lock;
+extern ipipe_spinlock_t i8259A_lock;
 
 extern void init_8259A(int auto_eoi);
 extern void enable_8259A_irq(unsigned int irq);
Index: linux-2.6.23/include/asm-i386/ipipe.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/asm-i386/ipipe.h
@@ -0,0 +1,205 @@
+/*   -*- linux-c -*-
+ *   include/asm-i386/ipipe.h
+ *
+ *   Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __I386_IPIPE_H
+#define __I386_IPIPE_H
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_ARCH_STRING	"1.10-07"
+#define IPIPE_MAJOR_NUMBER	1
+#define IPIPE_MINOR_NUMBER	10
+#define IPIPE_PATCH_NUMBER	7
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/ipipe_percpu.h>
+#include <asm/ptrace.h>
+
+#define ipipe_processor_id()   raw_smp_processor_id()
+
+#define prepare_arch_switch(next)		\
+do {						\
+	ipipe_schedule_notify(current, next);	\
+	local_irq_disable_hw();			\
+} while(0)
+
+#define task_hijacked(p)						\
+	({ int x = !ipipe_root_domain_p; \
+	__clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status));	\
+	local_irq_enable_hw(); x; })
+
+struct ipipe_domain;
+
+struct ipipe_sysinfo {
+
+	int ncpus;		/* Number of CPUs on board */
+	u64 cpufreq;		/* CPU frequency (in Hz) */
+
+	/* Arch-dependent block */
+
+	struct {
+		unsigned tmirq;	/* Timer tick IRQ */
+		u64 tmfreq;	/* Timer frequency */
+	} archdep;
+};
+
+#define ipipe_read_tsc(t)  __asm__ __volatile__("rdtsc" : "=A" (t))
+#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; })
+
+#define ipipe_tsc2ns(t) \
+({ \
+	unsigned long long delta = (t)*1000; \
+	do_div(delta, cpu_khz/1000+1); \
+	(unsigned long)delta; \
+})
+
+#define ipipe_tsc2us(t) \
+({ \
+    unsigned long long delta = (t); \
+    do_div(delta, cpu_khz/1000+1); \
+    (unsigned long)delta; \
+})
+
+/* Private interface -- Internal use only */
+
+#define __ipipe_check_platform()	do { } while(0)
+#define __ipipe_init_platform()		do { } while(0)
+#define __ipipe_enable_irq(irq)		irq_desc[irq].chip->enable(irq)
+#define __ipipe_disable_irq(irq)	irq_desc[irq].chip->disable(irq)
+
+#ifdef CONFIG_SMP
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
+#else
+#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
+#endif
+
+#define __ipipe_disable_irqdesc(ipd, irq)	do { } while(0)
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_enable_pipeline(void);
+
+int __ipipe_handle_irq(struct pt_regs regs);
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie);
+
+extern int __ipipe_tick_irq;
+
+#define __ipipe_call_root_xirq_handler(ipd,irq) \
+   __asm__ __volatile__ ("pushfl\n\t" \
+                         "pushl %%cs\n\t" \
+                         "pushl $1f\n\t" \
+	                 "pushl %%eax\n\t" \
+                         "pushl %%fs\n\t" \
+	                 "pushl %%es\n\t" \
+	                 "pushl %%ds\n\t" \
+	                 "pushl %%eax\n\t" \
+	                 "pushl %%ebp\n\t" \
+	                 "pushl %%edi\n\t" \
+	                 "pushl %%esi\n\t" \
+	                 "pushl %%edx\n\t" \
+	                 "pushl %%ecx\n\t" \
+	                 "pushl %%ebx\n\t" \
+                         "movl  %%esp,%%eax\n\t" \
+                         "call *%1\n\t" \
+	                 "jmp ret_from_intr\n\t" \
+	                 "1: cli\n" \
+			 : /* no output */ \
+			 : "a" (~irq), "m" ((ipd)->irqs[irq].handler))
+
+#define __ipipe_call_root_virq_handler(ipd,irq) \
+   __asm__ __volatile__ ("pushfl\n\t" \
+                         "pushl %%cs\n\t" \
+                         "pushl $__virq_end\n\t" \
+	                 "pushl $-1\n\t" \
+                         "pushl %%fs\n\t" \
+	                 "pushl %%es\n\t" \
+	                 "pushl %%ds\n\t" \
+	                 "pushl %%eax\n\t" \
+	                 "pushl %%ebp\n\t" \
+	                 "pushl %%edi\n\t" \
+	                 "pushl %%esi\n\t" \
+	                 "pushl %%edx\n\t" \
+	                 "pushl %%ecx\n\t" \
+	                 "pushl %%ebx\n\t" \
+			 "pushl %2\n\t" \
+                         "pushl %%eax\n\t" \
+                         "call *%1\n\t" \
+			 "addl $8,%%esp\n\t" \
+			 : /* no output */ \
+			 : "a" (irq), "m" ((ipd)->irqs[irq].handler), "r" ((ipd)->irqs[irq].cookie))
+
+#define __ipipe_finalize_root_virq_handler() \
+   __asm__ __volatile__ ("jmp ret_from_intr\n\t" \
+	                 "__virq_end: cli\n" \
+			 : /* no output */ \
+			 : /* no input */)
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+      __asm__("bsrl %1, %0":"=r"(ul)
+      :	"r"(ul));
+	return ul;
+}
+
+/* When running handlers, enable hw interrupts for all domains but the
+ * one heading the pipeline, so that IRQs can never be significantly
+ * deferred for the latter. */
+#define __ipipe_run_isr(ipd, irq) \
+do { \
+	local_irq_enable_nohead(ipd);				 \
+	if (ipd == ipipe_root_domain) {				 \
+		if (likely(!ipipe_virtual_irq_p(irq))) {	 \
+			__ipipe_call_root_xirq_handler(ipd,irq); \
+		} else {					 \
+			irq_enter();				 \
+			__ipipe_call_root_virq_handler(ipd,irq); \
+			irq_exit();				 \
+			__ipipe_finalize_root_virq_handler();	 \
+		}						\
+	} else {						\
+		__clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
+		ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);	\
+		__set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
+	}							\
+	local_irq_disable_nohead(ipd);				\
+} while(0)
+
+#endif /* __ASSEMBLY__ */
+
+#define __ipipe_syscall_watched_p(p, sc)	\
+	(((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls)
+
+int __ipipe_check_lapic(void);
+
+int __ipipe_check_tickdev(const char *devname);
+
+#else /* !CONFIG_IPIPE */
+
+#define task_hijacked(p)	0
+
+#endif /* CONFIG_IPIPE */
+
+#endif	/* !__I386_IPIPE_H */
Index: linux-2.6.23/include/asm-i386/ipipe_base.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/asm-i386/ipipe_base.h
@@ -0,0 +1,182 @@
+/* -*- linux-c -*-
+ * include/asm-i386/ipipe_base.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __I386_IPIPE_BASE_H
+#define __I386_IPIPE_BASE_H
+
+#include <linux/threads.h>
+#include <irq_vectors.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/* System interrupts are mapped beyond the last defined external IRQ
+ * number. */
+#define IPIPE_FIRST_APIC_IRQ	NR_IRQS
+#define IPIPE_NR_XIRQS		(NR_IRQS + 256 - FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_irq_vector(irq)  ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_vector_irq(vec)  ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
+/* If the APIC is enabled, then we expose four service vectors in the
+ * APIC space which are freely available to domains. */
+#define IPIPE_SERVICE_VECTOR0	0xf5
+#define IPIPE_SERVICE_IPI0	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
+#define IPIPE_SERVICE_VECTOR1	0xf6
+#define IPIPE_SERVICE_IPI1	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
+#define IPIPE_SERVICE_VECTOR2	0xf7
+#define IPIPE_SERVICE_IPI2	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
+#define IPIPE_SERVICE_VECTOR3	0xf8
+#define IPIPE_SERVICE_IPI3	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
+#define IPIPE_CRITICAL_VECTOR  0xf9	/* SMP-only: used by ipipe_critical_enter/exit() */
+#define IPIPE_CRITICAL_IPI     ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
+#else	/* !CONFIG_X86_LOCAL_APIC */
+#define IPIPE_NR_XIRQS		NR_IRQS
+#endif	/* !CONFIG_X86_LOCAL_APIC */
+
+#define IPIPE_IRQ_ISHIFT  	5	/* 2^5 for 32bits arch. */
+
+#define ex_do_divide_error		0
+#define ex_do_debug			1
+/* NMI not pipelined. */
+#define ex_do_int3			3
+#define ex_do_overflow			4
+#define ex_do_bounds			5
+#define ex_do_invalid_op		6
+#define ex_device_not_available		7
+/* Double fault not pipelined. */
+#define ex_do_coprocessor_segment_overrun 9
+#define ex_do_invalid_TSS		10
+#define ex_do_segment_not_present	11
+#define ex_do_stack_segment		12
+#define ex_do_general_protection	13
+#define ex_do_page_fault		14
+#define ex_do_spurious_interrupt_bug	15
+#define ex_do_coprocessor_error		16
+#define ex_do_alignment_check		17
+#define ex_machine_check_vector		18
+#define ex_do_simd_coprocessor_error	19
+#define ex_do_iret_error		32
+
+/* IDT fault vectors */
+#define IPIPE_NR_FAULTS		33 /* 32 from IDT + iret_error */
+/* Pseudo-vectors used for kernel events */
+#define IPIPE_FIRST_EVENT	IPIPE_NR_FAULTS
+#define IPIPE_EVENT_SYSCALL	(IPIPE_FIRST_EVENT)
+#define IPIPE_EVENT_SCHEDULE	(IPIPE_FIRST_EVENT + 1)
+#define IPIPE_EVENT_SIGWAKE	(IPIPE_FIRST_EVENT + 2)
+#define IPIPE_EVENT_SETSCHED	(IPIPE_FIRST_EVENT + 3)
+#define IPIPE_EVENT_INIT	(IPIPE_FIRST_EVENT + 4)
+#define IPIPE_EVENT_EXIT	(IPIPE_FIRST_EVENT + 5)
+#define IPIPE_EVENT_CLEANUP	(IPIPE_FIRST_EVENT + 6)
+#define IPIPE_LAST_EVENT	IPIPE_EVENT_CLEANUP
+#define IPIPE_NR_EVENTS		(IPIPE_LAST_EVENT + 1)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/alternative.h>
+
+#ifdef CONFIG_SMP
+
+#define GET_ROOT_STATUS_ADDR					\
+	"pushfl; cli;"						\
+	"movl %%fs:per_cpu__this_cpu_off, %%eax;"		\
+	"lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;"
+#define PUT_ROOT_STATUS_ADDR	"popfl;"
+
+static inline void __ipipe_stall_root(void)
+{
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     LOCK_PREFIX
+			     "btsl $0,(%%eax);"
+			     PUT_ROOT_STATUS_ADDR
+			     : : : "eax", "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     LOCK_PREFIX
+			     "btsl $0,(%%eax);"
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : "eax", "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     "btl $0,(%%eax);"
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : "eax");
+	return oldbit;
+}
+
+#else /* ! CONFIG_SMP */
+
+#if __GNUC__ >= 4
+/* Alias to ipipe_root_cpudom_var(status) */
+extern unsigned long __ipipe_root_status;
+#else
+extern unsigned long *const __ipipe_root_status_addr;
+#define __ipipe_root_status	(*__ipipe_root_status_addr)
+#endif
+
+static inline void __ipipe_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	__asm__ __volatile__("btsl $0,%0;"
+			     :"+m" (*p) : : "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btsl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit), "+m" (*p)
+			     : : "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit)
+			     :"m" (*p));
+	return oldbit;
+}
+
+#endif	/* CONFIG_SMP */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif	/* !__I386_IPIPE_BASE_H */
Index: linux-2.6.23/include/asm-i386/irqflags.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/irqflags.h
+++ linux-2.6.23/include/asm-i386/irqflags.h
@@ -12,32 +12,62 @@
 #include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/ipipe_trace.h>
+
 static inline unsigned long native_save_fl(void)
 {
 	unsigned long f;
+#ifdef CONFIG_IPIPE
+	f = (!__ipipe_test_root()) << 9;
+	barrier();
+#else
 	asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
+#endif
 	return f;
 }
 
 static inline void native_restore_fl(unsigned long f)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_restore_root(!(f & X86_EFLAGS_IF));
+#else
 	asm volatile("pushl %0 ; popfl": /* no output */
 			     :"g" (f)
 			     :"memory", "cc");
+#endif
 }
 
 static inline void native_irq_disable(void)
 {
+#ifdef CONFIG_IPIPE
+	ipipe_check_context(ipipe_root_domain);
+	__ipipe_stall_root();
+	barrier();
+#else
 	asm volatile("cli": : :"memory");
+#endif
 }
 
 static inline void native_irq_enable(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_unstall_root();
+#else
 	asm volatile("sti": : :"memory");
+#endif
 }
 
 static inline void native_safe_halt(void)
 {
+#ifdef CONFIG_IPIPE
+	__ipipe_unstall_root();
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+	ipipe_trace_end(0x8000000E);
+#endif
+#endif
 	asm volatile("sti; hlt": : :"memory");
 }
 
@@ -95,16 +125,31 @@ static inline void halt(void)
  */
 static inline unsigned long __raw_local_irq_save(void)
 {
+#ifdef CONFIG_IPIPE
+	unsigned long flags = (!__ipipe_test_and_stall_root()) << 9;
+	barrier();
+#else
 	unsigned long flags = __raw_local_save_flags();
-
 	raw_local_irq_disable();
-
+#endif
 	return flags;
 }
 
 #else
+
+#ifdef CONFIG_IPIPE
+#define DISABLE_INTERRUPTS(clobbers)	PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti
+#define ENABLE_INTERRUPTS(clobbers)	call __ipipe_unstall_root
+#define ENABLE_INTERRUPTS_HW_COND	sti
+#define DISABLE_INTERRUPTS_HW(clobbers)	cli
+#define ENABLE_INTERRUPTS_HW(clobbers)	sti
+#else /* !CONFIG_IPIPE */
 #define DISABLE_INTERRUPTS(clobbers)	cli
 #define ENABLE_INTERRUPTS(clobbers)	sti
+#define ENABLE_INTERRUPTS_HW_COND
+#define DISABLE_INTERRUPTS_HW(clobbers)	DISABLE_INTERRUPTS(clobbers)
+#define ENABLE_INTERRUPTS_HW(clobbers)	ENABLE_INTERRUPTS(clobbers)
+#endif /* !CONFIG_IPIPE */
 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
 #define INTERRUPT_RETURN		iret
 #define GET_CR0_INTO_EAX		movl %cr0, %eax
@@ -115,8 +160,10 @@ static inline unsigned long __raw_local_
 #define raw_local_save_flags(flags) \
 		do { (flags) = __raw_local_save_flags(); } while (0)
 
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
+#define raw_local_irq_save(flags) do {			\
+		ipipe_check_context(ipipe_root_domain);	\
+		(flags) = __raw_local_irq_save();	\
+	} while (0)
 
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
@@ -129,6 +176,70 @@ static inline int raw_irqs_disabled(void
 
 	return raw_irqs_disabled_flags(flags);
 }
+
+static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real)
+{
+	/* Merge virtual and real interrupt mask bits into a single
+	   32bit word. */
+	return (real & ~(1 << 31)) | ((virt != 0) << 31);
+}
+
+static inline int raw_demangle_irq_bits(unsigned long *x)
+{
+	int virt = (*x & (1 << 31)) != 0;
+	*x &= ~(1L << 31);
+	return virt;
+}
+
+#define local_irq_disable_hw_notrace() \
+	__asm__ __volatile__("cli": : :"memory")
+#define local_irq_enable_hw_notrace() \
+	__asm__ __volatile__("sti": : :"memory")
+#define local_irq_save_hw_notrace(x) \
+	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore_hw_notrace(x) \
+	__asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+
+#define local_save_flags_hw(x)	__asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define irqs_disabled_hw()		\
+    ({					\
+	unsigned long x;		\
+	local_save_flags_hw(x);		\
+	raw_irqs_disabled_flags(x);	\
+    })
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define local_irq_disable_hw() do {			\
+		if (!irqs_disabled_hw()) {		\
+			local_irq_disable_hw_notrace();	\
+			ipipe_trace_begin(0x80000000);	\
+		}					\
+	} while (0)
+#define local_irq_enable_hw() do {			\
+		if (irqs_disabled_hw()) {		\
+			ipipe_trace_end(0x80000000);	\
+			local_irq_enable_hw_notrace();	\
+		}					\
+	} while (0)
+#define local_irq_save_hw(x) do {			\
+		local_save_flags_hw(x);			\
+		if (!raw_irqs_disabled_flags(x)) {	\
+			local_irq_disable_hw_notrace();	\
+			ipipe_trace_begin(0x80000001);	\
+		}					\
+	} while (0)
+#define local_irq_restore_hw(x) do {			\
+		if (!raw_irqs_disabled_flags(x))	\
+			ipipe_trace_end(0x80000001);	\
+		local_irq_restore_hw_notrace(x);	\
+	} while (0)
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+#define local_irq_save_hw(x)	local_irq_save_hw_notrace(x)
+#define local_irq_restore_hw(x)	local_irq_restore_hw_notrace(x)
+#define local_irq_enable_hw()	local_irq_enable_hw_notrace()
+#define local_irq_disable_hw()	local_irq_disable_hw_notrace()
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+
 #endif /* __ASSEMBLY__ */
 
 /*
Index: linux-2.6.23/include/asm-i386/mmu_context.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/mmu_context.h
+++ linux-2.6.23/include/asm-i386/mmu_context.h
@@ -79,8 +79,11 @@ static inline void switch_mm(struct mm_s
 
 #define activate_mm(prev, next)				\
 	do {						\
+		unsigned long flags;			\
 		paravirt_activate_mm(prev, next);	\
+		local_irq_save_hw_cond(flags);		\
 		switch_mm((prev),(next),NULL);		\
+		local_irq_restore_hw_cond(flags);	\
 	} while(0);
 
 #endif
Index: linux-2.6.23/include/asm-i386/nmi.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/nmi.h
+++ linux-2.6.23/include/asm-i386/nmi.h
@@ -29,7 +29,8 @@ extern void setup_apic_nmi_watchdog (voi
 extern void stop_apic_nmi_watchdog (void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern int (*nmi_watchdog_tick)(struct pt_regs * regs, unsigned reason);
+extern void die_nmi(struct pt_regs *, const char *msg);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
Index: linux-2.6.23/include/asm-i386/spinlock.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/spinlock.h
+++ linux-2.6.23/include/asm-i386/spinlock.h
@@ -54,7 +54,7 @@ static inline void __raw_spin_lock(raw_s
  * NOTE: there's an irqs-on section here, which normally would have to be
  * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
  */
-#ifndef CONFIG_PROVE_LOCKING
+#if !defined(CONFIG_PROVE_LOCKING) && !defined(CONFIG_IPIPE)
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	asm volatile(
Index: linux-2.6.23/include/linux/clockchips.h
===================================================================
--- linux-2.6.23.orig/include/linux/clockchips.h
+++ linux-2.6.23/include/linux/clockchips.h
@@ -91,6 +91,7 @@ struct clock_event_device {
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
+	int64_t			delta;
 };
 
 /*
Index: linux-2.6.23/include/linux/hardirq.h
===================================================================
--- linux-2.6.23.orig/include/linux/hardirq.h
+++ linux-2.6.23/include/linux/hardirq.h
@@ -146,7 +146,7 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter()		do { if (ipipe_root_domain_p) { lockdep_off(); __irq_enter(); } } while (0)
+#define nmi_exit()		do { if (ipipe_root_domain_p) { __irq_exit(); lockdep_on(); } } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
Index: linux-2.6.23/include/linux/ipipe.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe.h
@@ -0,0 +1,552 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_H
+#define __LINUX_IPIPE_H
+
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/linkage.h>
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_compat.h>
+#include <asm/ipipe.h>
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_VERSION_STRING	IPIPE_ARCH_STRING
+#define IPIPE_RELEASE_NUMBER	((IPIPE_MAJOR_NUMBER << 16) | \
+				 (IPIPE_MINOR_NUMBER <<  8) | \
+				 (IPIPE_PATCH_NUMBER))
+
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
+#endif /* !BUILTIN_RETURN_ADDRESS */
+
+#define IPIPE_ROOT_PRIO		100
+#define IPIPE_ROOT_ID		0
+#define IPIPE_ROOT_NPTDKEYS	4	/* Must be <= BITS_PER_LONG */
+
+#define IPIPE_RESET_TIMER	0x1
+#define IPIPE_GRAB_TIMER	0x2
+
+/* Global domain flags */
+#define IPIPE_SPRINTK_FLAG	0	/* Synchronous printk() allowed */
+#define IPIPE_AHEAD_FLAG	1	/* Domain always heads the pipeline */
+
+/* Interrupt control bits */
+#define IPIPE_HANDLE_FLAG	0
+#define IPIPE_PASS_FLAG		1
+#define IPIPE_ENABLE_FLAG	2
+#define IPIPE_DYNAMIC_FLAG	IPIPE_HANDLE_FLAG
+#define IPIPE_STICKY_FLAG	3
+#define IPIPE_SYSTEM_FLAG	4
+#define IPIPE_LOCK_FLAG		5
+#define IPIPE_WIRED_FLAG	6
+#define IPIPE_EXCLUSIVE_FLAG	7
+
+#define IPIPE_HANDLE_MASK	(1 << IPIPE_HANDLE_FLAG)
+#define IPIPE_PASS_MASK		(1 << IPIPE_PASS_FLAG)
+#define IPIPE_ENABLE_MASK	(1 << IPIPE_ENABLE_FLAG)
+#define IPIPE_DYNAMIC_MASK	IPIPE_HANDLE_MASK
+#define IPIPE_STICKY_MASK	(1 << IPIPE_STICKY_FLAG)
+#define IPIPE_SYSTEM_MASK	(1 << IPIPE_SYSTEM_FLAG)
+#define IPIPE_LOCK_MASK		(1 << IPIPE_LOCK_FLAG)
+#define IPIPE_WIRED_MASK	(1 << IPIPE_WIRED_FLAG)
+#define IPIPE_EXCLUSIVE_MASK	(1 << IPIPE_EXCLUSIVE_FLAG)
+
+#define IPIPE_DEFAULT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK)
+#define IPIPE_STDROOT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK)
+
+#define IPIPE_EVENT_SELF        0x80000000
+
+#define IPIPE_NR_CPUS		NR_CPUS
+
+#define ipipe_current_domain	ipipe_cpu_var(ipipe_percpu_domain)
+
+#define ipipe_virtual_irq_p(irq)	((irq) >= IPIPE_VIRQ_BASE && \
+					 (irq) < IPIPE_NR_IRQS)
+
+typedef void (*ipipe_irq_handler_t)(unsigned irq,
+				    void *cookie);
+
+#define IPIPE_SAME_HANDLER	((ipipe_irq_handler_t)(-1))
+
+typedef int (*ipipe_irq_ackfn_t)(unsigned irq);
+
+typedef int (*ipipe_event_handler_t)(unsigned event,
+				     struct ipipe_domain *from,
+				     void *data);
+struct ipipe_domain {
+
+	int slot;			/* Slot number in percpu domain data array. */
+	struct list_head p_link;	/* Link in pipeline */
+	ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */
+	unsigned long long evself;	/* Self-monitored event bits. */
+
+	struct {
+		unsigned long control;
+		ipipe_irq_ackfn_t acknowledge;
+		ipipe_irq_handler_t handler;
+		void *cookie;
+	} ____cacheline_aligned irqs[IPIPE_NR_IRQS];
+
+	int priority;
+	void *pdd;
+	unsigned long flags;
+	unsigned domid;
+	const char *name;
+	struct mutex mutex;
+};
+
+#define IPIPE_HEAD_PRIORITY	(-1) /* For domains always heading the pipeline */
+
+struct ipipe_domain_attr {
+
+	unsigned domid;		/* Domain identifier -- Magic value set by caller */
+	const char *name;	/* Domain name -- Warning: won't be dup'ed! */
+	int priority;		/* Priority in interrupt pipeline */
+	void (*entry) (void);	/* Domain entry point */
+	void *pdd;		/* Per-domain (opaque) data pointer */
+};
+
+#ifdef CONFIG_SMP
+/* These ops must start and complete on the same CPU: care for
+ * migration. */
+#define set_bit_safe(b, a)						\
+		({ unsigned long __flags;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__set_bit(b, a);					\
+		local_irq_restore_hw_notrace(__flags); })
+#define test_and_set_bit_safe(b, a)					\
+		({ unsigned long __flags, __x;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__x = __test_and_set_bit(b, a);				\
+		local_irq_restore_hw_notrace(__flags); __x; })
+#define clear_bit_safe(b, a)						\
+		({ unsigned long __flags;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__clear_bit(b, a);					\
+		local_irq_restore_hw_notrace(__flags); })
+#else
+#define set_bit_safe(b, a)		set_bit(b, a)
+#define test_and_set_bit_safe(b, a)	test_and_set_bit(b, a)
+#define clear_bit_safe(b, a)		clear_bit(b, a)
+#endif
+
+#define __ipipe_irq_cookie(ipd, irq)		(ipd)->irqs[irq].cookie
+#define __ipipe_irq_handler(ipd, irq)		(ipd)->irqs[irq].handler
+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq)	ipipe_percpudom(ipd, irqall, cpu)[irq]
+
+extern unsigned __ipipe_printk_virq;
+
+extern unsigned long __ipipe_virtual_irq_map;
+
+extern struct list_head __ipipe_pipeline;
+
+extern int __ipipe_event_monitors[];
+
+/* Private interface */
+
+void ipipe_init(void);
+
+#ifdef CONFIG_PROC_FS
+void ipipe_init_proc(void);
+
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_init_tracer(void);
+#else /* !CONFIG_IPIPE_TRACE */
+#define __ipipe_init_tracer()       do { } while(0)
+#endif /* CONFIG_IPIPE_TRACE */
+
+#else	/* !CONFIG_PROC_FS */
+#define ipipe_init_proc()	do { } while(0)
+#endif	/* CONFIG_PROC_FS */
+
+void __ipipe_init_stage(struct ipipe_domain *ipd);
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd);
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_flush_printk(unsigned irq, void *cookie);
+
+void fastcall __ipipe_walk_pipeline(struct list_head *pos);
+
+int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head);
+
+int fastcall __ipipe_dispatch_event(unsigned event, void *data);
+
+int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, unsigned irq);
+
+void fastcall __ipipe_sync_stage(unsigned long syncmask);
+
+void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq);
+
+void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq);
+
+void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end);
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_lock(unsigned irq)
+{
+	__ipipe_lock_irq(ipipe_current_domain, ipipe_processor_id(), irq);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_unlock(unsigned irq)
+{
+	__ipipe_unlock_irq(ipipe_current_domain, irq);
+}
+
+#ifndef __ipipe_sync_pipeline
+#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask)
+#endif
+
+#ifndef __ipipe_run_irqtail
+#define __ipipe_run_irqtail() do { } while(0)
+#endif
+
+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next)
+
+/*
+ * Keep the following as a macro, so that client code could check for
+ * the support of the invariant pipeline head optimization.
+ */
+#define __ipipe_pipeline_head() list_entry(__ipipe_pipeline.next,struct ipipe_domain,p_link)
+
+#define __ipipe_event_monitored_p(ev) \
+	(__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL << ev)))
+
+#ifdef CONFIG_SMP
+
+cpumask_t __ipipe_set_irq_affinity(unsigned irq,
+				   cpumask_t cpumask);
+
+int fastcall __ipipe_send_ipi(unsigned ipi,
+			      cpumask_t cpumask);
+
+#endif /* CONFIG_SMP */
+
+#define ipipe_sigwake_notify(p)	\
+do {					\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p);		\
+} while(0)
+
+#define ipipe_exit_notify(p)	\
+do {				\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_EXIT,p);		\
+} while(0)
+
+#define ipipe_setsched_notify(p)	\
+do {					\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p);		\
+} while(0)
+
+#define ipipe_schedule_notify(prev, next)				\
+do {									\
+	if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) &&		\
+	    __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE))		\
+		__ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next);	\
+} while(0)
+
+#define ipipe_trap_notify(ex, regs)		\
+({						\
+	int ret = 0;				\
+	if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \
+	     ((current)->flags & PF_EVNOTIFY)) &&			\
+	    __ipipe_event_monitored_p(ex))				\
+		ret = __ipipe_dispatch_event(ex, regs);			\
+	ret;								\
+})
+
+static inline void ipipe_init_notify(struct task_struct *p)
+{
+	if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT))
+		__ipipe_dispatch_event(IPIPE_EVENT_INIT,p);
+}
+
+static inline void ipipe_cleanup_notify(struct mm_struct *mm)
+{
+	if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP))
+		__ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm);
+}
+
+/* Public interface */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr);
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd);
+
+void ipipe_suspend_domain(void);
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask);
+
+int ipipe_control_irq(unsigned irq,
+		      unsigned clrmask,
+		      unsigned setmask);
+
+unsigned ipipe_alloc_virq(void);
+
+int ipipe_free_virq(unsigned virq);
+
+int fastcall ipipe_trigger_irq(unsigned irq);
+
+static inline int ipipe_propagate_irq(unsigned irq)
+{
+	return __ipipe_schedule_irq(irq, ipipe_current_domain->p_link.next);
+}
+
+static inline int ipipe_schedule_irq(unsigned irq)
+{
+	return __ipipe_schedule_irq(irq, &ipipe_current_domain->p_link);
+}
+
+void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd);
+
+void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd);
+
+void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x);
+
+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
+{
+	return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+}
+
+static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+}
+
+void ipipe_unstall_pipeline_head(void);
+
+void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain,
+					    unsigned long x);
+
+static inline void ipipe_restore_pipeline_head(unsigned long x)
+{
+	struct ipipe_domain *head_domain = __ipipe_pipeline_head();
+	/* On some archs, __test_and_set_bit() might return different
+	 * truth value than test_bit(), so we test the exclusive OR of
+	 * both statuses, assuming that the lowest bit is always set in
+	 * the truth value (if this is wrong, the failed optimization will
+	 * be caught in __ipipe_restore_pipeline_head() if
+	 * CONFIG_DEBUG_KERNEL is set). */
+	if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) & 1)
+		__ipipe_restore_pipeline_head(head_domain, x);
+}
+
+#define ipipe_unstall_pipeline() \
+	ipipe_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_unstall_pipeline() \
+	ipipe_test_and_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_pipeline() \
+	ipipe_test_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_stall_pipeline() \
+	ipipe_test_and_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_stall_pipeline() \
+	ipipe_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_restore_pipeline(x) \
+	ipipe_restore_pipeline_from(ipipe_current_domain, (x))
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr);
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
+
+unsigned long ipipe_critical_enter(void (*syncfn) (void));
+
+void ipipe_critical_exit(unsigned long flags);
+
+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
+{
+	set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd)
+{
+	clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+#define ipipe_safe_current()					\
+({								\
+	struct task_struct *p;					\
+	p = test_bit(IPIPE_NOSTACK_FLAG,			\
+		     &ipipe_this_cpudom_var(status)) ? &init_task : current; \
+	p; \
+})
+
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler);
+
+cpumask_t ipipe_set_irq_affinity(unsigned irq,
+				 cpumask_t cpumask);
+
+int fastcall ipipe_send_ipi(unsigned ipi,
+			    cpumask_t cpumask);
+
+int ipipe_setscheduler_root(struct task_struct *p,
+			    int policy,
+			    int prio);
+
+int ipipe_reenter_root(struct task_struct *prev,
+		       int policy,
+		       int prio);
+
+int ipipe_alloc_ptdkey(void);
+
+int ipipe_free_ptdkey(int key);
+
+int fastcall ipipe_set_ptd(int key,
+			   void *value);
+
+void fastcall *ipipe_get_ptd(int key);
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
+
+#define local_irq_enable_hw_cond()		local_irq_enable_hw()
+#define local_irq_disable_hw_cond()		local_irq_disable_hw()
+#define local_irq_save_hw_cond(flags)		local_irq_save_hw(flags)
+#define local_irq_restore_hw_cond(flags)	local_irq_restore_hw(flags)
+#define local_irq_disable_head()		ipipe_stall_pipeline_head()
+
+#define local_irq_enable_nohead(ipd)			\
+	do {						\
+		if (!__ipipe_pipeline_head_p(ipd))	\
+			local_irq_enable_hw();		\
+	} while(0)
+
+#define local_irq_disable_nohead(ipd)		\
+	do {						\
+		if (!__ipipe_pipeline_head_p(ipd))	\
+			local_irq_disable_hw();		\
+	} while(0)
+
+#define ipipe_root_domain_p		(ipipe_current_domain == ipipe_root_domain)
+
+#else	/* !CONFIG_IPIPE */
+
+#define ipipe_init()			do { } while(0)
+#define ipipe_suspend_domain()		do { } while(0)
+#define ipipe_sigwake_notify(p)		do { } while(0)
+#define ipipe_setsched_notify(p)	do { } while(0)
+#define ipipe_init_notify(p)		do { } while(0)
+#define ipipe_exit_notify(p)		do { } while(0)
+#define ipipe_cleanup_notify(mm)	do { } while(0)
+#define ipipe_trap_notify(t,r)		0
+#define ipipe_init_proc()		do { } while(0)
+#define __ipipe_pin_range_globally(start, end)	do { } while(0)
+
+#define local_irq_enable_hw_cond()		do { } while(0)
+#define local_irq_disable_hw_cond()		do { } while(0)
+#define local_irq_save_hw_cond(flags)		do { (void)(flags); } while(0)
+#define local_irq_restore_hw_cond(flags)	do { } while(0)
+
+#define ipipe_irq_lock(irq)		do { } while(0)
+#define ipipe_irq_unlock(irq)		do { } while(0)
+
+#define ipipe_root_domain_p		1
+#define ipipe_safe_current		current
+
+#define local_irq_disable_head()	local_irq_disable()
+
+#endif	/* CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+#include <linux/cpumask.h>
+#include <asm/system.h>
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0);
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state)
+{
+	per_cpu(ipipe_percpu_context_check, cpu) = old_state;
+}
+
+static inline void ipipe_context_check_off(void)
+{
+	int cpu;
+	for_each_online_cpu(cpu)
+		per_cpu(ipipe_percpu_context_check, cpu) = 0;
+}
+
+#else	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return 0;
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state) { }
+
+static inline void ipipe_context_check_off(void) { }
+
+#endif	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#endif	/* !__LINUX_IPIPE_H */
Index: linux-2.6.23/include/linux/ipipe_base.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_base.h
@@ -0,0 +1,77 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_base.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *               2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_BASE_H
+#define __LINUX_IPIPE_BASE_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/bitops.h>
+#include <asm/ipipe_base.h>
+
+/* Number of virtual IRQs */
+#define IPIPE_NR_VIRQS		BITS_PER_LONG
+/* First virtual IRQ # */
+#define IPIPE_VIRQ_BASE		(((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG)
+/* Total number of IRQ slots */
+#define IPIPE_NR_IRQS		(IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS)
+/* Number of indirect words needed to map the whole IRQ space. */
+#define IPIPE_IRQ_IWORDS	((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define IPIPE_IRQ_IMASK		(BITS_PER_LONG - 1)
+#define IPIPE_IRQMASK_ANY	(~0L)
+#define IPIPE_IRQMASK_VIRT	(IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG))
+
+/* Per-cpu pipeline status */
+#define IPIPE_STALL_FLAG	0	/* Stalls a pipeline stage -- guaranteed at bit #0 */
+#define IPIPE_SYNC_FLAG		1	/* The interrupt syncer is running for the domain */
+#define IPIPE_NOSTACK_FLAG	2	/* Domain currently runs on a foreign stack */
+
+#define IPIPE_STALL_MASK	(1L << IPIPE_STALL_FLAG)
+#define IPIPE_SYNC_MASK		(1L << IPIPE_SYNC_FLAG)
+
+extern struct ipipe_domain ipipe_root;
+
+#define ipipe_root_domain (&ipipe_root)
+
+void __ipipe_unstall_root(void);
+
+void __ipipe_restore_root(unsigned long x);
+
+#define ipipe_preempt_disable(flags)	local_irq_save_hw(flags)
+#define ipipe_preempt_enable(flags)	local_irq_restore_hw(flags)
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+void ipipe_check_context(struct ipipe_domain *border_ipd);
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { }
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#else /* !CONFIG_IPIPE */
+#define ipipe_preempt_disable(flags)	do { \
+						preempt_disable(); \
+						(void)(flags); \
+					while (0)
+#define ipipe_preempt_enable(flags)	preempt_enable()
+#define ipipe_check_context(ipd)	do { } while(0)
+#endif	/* CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_BASE_H */
Index: linux-2.6.23/include/linux/ipipe_compat.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_compat.h
@@ -0,0 +1,54 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_compat.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_COMPAT_H
+#define __LINUX_IPIPE_COMPAT_H
+
+#ifdef CONFIG_IPIPE_COMPAT
+/*
+ * OBSOLETE: defined only for backward compatibility. Will be removed
+ * in future releases, please update client code accordingly.
+ */
+
+#ifdef CONFIG_SMP
+#define ipipe_declare_cpuid	int cpuid
+#define ipipe_load_cpuid()	do { \
+					cpuid = ipipe_processor_id();	\
+				} while(0)
+#define ipipe_lock_cpu(flags)	do { \
+					local_irq_save_hw(flags); \
+					cpuid = ipipe_processor_id(); \
+				} while(0)
+#define ipipe_unlock_cpu(flags)	local_irq_restore_hw(flags)
+#define ipipe_get_cpu(flags)	ipipe_lock_cpu(flags)
+#define ipipe_put_cpu(flags)	ipipe_unlock_cpu(flags)
+#else /* !CONFIG_SMP */
+#define ipipe_declare_cpuid	const int cpuid = 0
+#define ipipe_load_cpuid()	do { } while(0)
+#define ipipe_lock_cpu(flags)	local_irq_save_hw(flags)
+#define ipipe_unlock_cpu(flags)	local_irq_restore_hw(flags)
+#define ipipe_get_cpu(flags)	do { (void)(flags); } while(0)
+#define ipipe_put_cpu(flags)	do { } while(0)
+#endif /* CONFIG_SMP */
+
+#endif /* CONFIG_IPIPE_COMPAT */
+
+#endif	/* !__LINUX_IPIPE_COMPAT_H */
Index: linux-2.6.23/include/linux/ipipe_percpu.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_percpu.h
@@ -0,0 +1,69 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_percpu.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_PERCPU_H
+#define __LINUX_IPIPE_PERCPU_H
+
+#include <asm/percpu.h>
+#include <asm/ptrace.h>
+
+struct ipipe_domain;
+
+struct ipipe_percpu_domain_data {
+	unsigned long status;	/* <= Must be first in struct. */
+	unsigned long irqpend_himask;
+	unsigned long irqpend_lomask[IPIPE_IRQ_IWORDS];
+	unsigned long irqheld_mask[IPIPE_IRQ_IWORDS];
+	unsigned long irqall[IPIPE_NR_IRQS];
+	u64 evsync;
+};
+
+#ifdef CONFIG_SMP
+#define ipipe_percpudom(ipd, var, cpu)	\
+	(per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot].var)
+#define ipipe_cpudom_var(ipd, var)	\
+	(__raw_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot].var)
+#else
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]);
+#define ipipe_percpudom(ipd, var, cpu)	\
+	(per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]->var)
+#define ipipe_cpudom_var(ipd, var)	\
+	(__raw_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]->var)
+#endif
+
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
+
+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+DECLARE_PER_CPU(int, ipipe_percpu_context_check);
+#endif
+
+#define ipipe_percpu(var, cpu)		per_cpu(var, cpu)
+#define ipipe_cpu_var(var)		__raw_get_cpu_var(var)
+
+#define ipipe_root_cpudom_var(var)	\
+	__raw_get_cpu_var(ipipe_percpu_darray)[0].var
+
+#define ipipe_this_cpudom_var(var)	\
+	ipipe_cpudom_var(ipipe_current_domain, var)
+
+#endif	/* !__LINUX_IPIPE_PERCPU_H */
Index: linux-2.6.23/include/linux/ipipe_tickdev.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_tickdev.h
@@ -0,0 +1,55 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_tickdev.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_TICKDEV_H
+#define __LINUX_IPIPE_TICKDEV_H
+
+#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS)
+
+#include <linux/clockchips.h>
+
+struct tick_device;
+
+struct ipipe_tick_device {
+
+	void (*emul_set_mode)(enum clock_event_mode,
+			      struct ipipe_tick_device *tdev);
+	int (*emul_set_tick)(unsigned long delta,
+			     struct ipipe_tick_device *tdev);
+	void (*real_set_mode)(enum clock_event_mode mode,
+			      struct clock_event_device *cdev);
+	int (*real_set_tick)(unsigned long delta,
+			     struct clock_event_device *cdev);
+	struct tick_device *slave;
+};
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct ipipe_tick_device *tdev),
+			  int (*emutick)(unsigned long evt,
+					 struct ipipe_tick_device *tdev),
+			  int cpu);
+
+void ipipe_release_tickdev(int cpu);
+
+#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */
+
+#endif /* !__LINUX_IPIPE_TICKDEV_H */
Index: linux-2.6.23/include/linux/ipipe_trace.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_trace.h
@@ -0,0 +1,65 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_trace.h
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _LINUX_IPIPE_TRACE_H
+#define _LINUX_IPIPE_TRACE_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/types.h>
+
+void ipipe_trace_begin(unsigned long v);
+void ipipe_trace_end(unsigned long v);
+void ipipe_trace_freeze(unsigned long v);
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
+void ipipe_trace_pid(pid_t pid, short prio);
+
+int ipipe_trace_max_reset(void);
+int ipipe_trace_frozen_reset(void);
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define ipipe_trace_irq_entry(irq)	ipipe_trace_begin(irq)
+#define ipipe_trace_irq_exit(irq)	ipipe_trace_end(irq)
+#define ipipe_trace_irqsoff()		ipipe_trace_begin(0x80000000UL)
+#define ipipe_trace_irqson()		ipipe_trace_end(0x80000000UL)
+#else
+#define ipipe_trace_irq_entry(irq)	do { } while(0)
+#define ipipe_trace_irq_exit(irq)	do { } while(0)
+#define ipipe_trace_irqsoff()		do { } while(0)
+#define ipipe_trace_irqson()		do { } while(0)
+#endif
+
+#endif /* CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+
+void ipipe_trace_panic_freeze(void);
+void ipipe_trace_panic_dump(void);
+
+#else /* !CONFIG_IPIPE_TRACE_PANIC */
+
+static inline void ipipe_trace_panic_freeze(void) { }
+static inline void ipipe_trace_panic_dump(void) { }
+
+#endif /* !CONFIG_IPIPE_TRACE_PANIC */
+
+#endif	/* !__LINUX_IPIPE_H */
Index: linux-2.6.23/include/linux/irq.h
===================================================================
--- linux-2.6.23.orig/include/linux/irq.h
+++ linux-2.6.23/include/linux/irq.h
@@ -150,6 +150,12 @@ struct irq_chip {
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+#ifdef CONFIG_IPIPE
+	void			fastcall (*ipipe_ack)(unsigned int irq,
+						      struct irq_desc *desc);
+	void			fastcall (*ipipe_end)(unsigned int irq,
+						      struct irq_desc *desc);
+#endif /* CONFIG_IPIPE */
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
Index: linux-2.6.23/include/linux/kernel.h
===================================================================
--- linux-2.6.23.orig/include/linux/kernel.h
+++ linux-2.6.23/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/ipipe_base.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -84,9 +85,12 @@ struct user;
  */
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int cond_resched(void);
-# define might_resched() cond_resched()
+# define might_resched() do { \
+		ipipe_check_context(ipipe_root_domain); \
+		cond_resched(); \
+	} while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() ipipe_check_context(ipipe_root_domain)
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
Index: linux-2.6.23/include/linux/linkage.h
===================================================================
--- linux-2.6.23.orig/include/linux/linkage.h
+++ linux-2.6.23/include/linux/linkage.h
@@ -64,4 +64,8 @@
 #define fastcall
 #endif
 
+#ifndef notrace
+#define notrace		__attribute__((no_instrument_function))
+#endif
+
 #endif
Index: linux-2.6.23/include/linux/mm.h
===================================================================
--- linux-2.6.23.orig/include/linux/mm.h
+++ linux-2.6.23/include/linux/mm.h
@@ -168,6 +168,7 @@ extern unsigned int kobjsize(const void 
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
+#define VM_PINNED	0x08000000	/* Disable faults for the vma */
 
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 
Index: linux-2.6.23/include/linux/preempt.h
===================================================================
--- linux-2.6.23.orig/include/linux/preempt.h
+++ linux-2.6.23/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/ipipe_base.h>
 #include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -29,18 +30,21 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_disable() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	inc_preempt_count(); \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	barrier(); \
 	dec_preempt_count(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
 		preempt_schedule(); \
 } while (0)
@@ -54,10 +58,10 @@ do { \
 
 #else
 
-#define preempt_disable()		do { } while (0)
-#define preempt_enable_no_resched()	do { } while (0)
-#define preempt_enable()		do { } while (0)
-#define preempt_check_resched()		do { } while (0)
+#define preempt_disable()		ipipe_check_context(ipipe_root_domain)
+#define preempt_enable_no_resched()	ipipe_check_context(ipipe_root_domain)
+#define preempt_enable()		ipipe_check_context(ipipe_root_domain)
+#define preempt_check_resched()		ipipe_check_context(ipipe_root_domain)
 
 #endif
 
Index: linux-2.6.23/include/linux/sched.h
===================================================================
--- linux-2.6.23.orig/include/linux/sched.h
+++ linux-2.6.23/include/linux/sched.h
@@ -57,6 +57,7 @@ struct sched_param {
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/nodemask.h>
+#include <linux/ipipe.h>
 
 #include <asm/system.h>
 #include <asm/semaphore.h>
@@ -175,6 +176,13 @@ print_cfs_rq(struct seq_file *m, int cpu
 /* in tsk->state again */
 #define TASK_NONINTERACTIVE	64
 #define TASK_DEAD		128
+#ifdef CONFIG_IPIPE
+#define TASK_ATOMICSWITCH	512
+#define TASK_NOWAKEUP          1024
+#else  /* !CONFIG_IPIPE */
+#define TASK_ATOMICSWITCH	0
+#define TASK_NOWAKEUP          0
+#endif /* CONFIG_IPIPE */
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -1184,6 +1192,9 @@ struct task_struct {
 
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
+#ifdef CONFIG_IPIPE
+	void *ptd[IPIPE_ROOT_NPTDKEYS];
+#endif
 
 	/*
 	 * cache last used pipe for splice
@@ -1334,6 +1345,11 @@ static inline void put_task_struct(struc
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
+#ifdef CONFIG_IPIPE
+#define PF_EVNOTIFY	0x80000000	/* Notify other domains about internal events */
+#else
+#define PF_EVNOTIFY	0
+#endif /* CONFIG_IPIPE */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
Index: linux-2.6.23/include/linux/spinlock.h
===================================================================
--- linux-2.6.23.orig/include/linux/spinlock.h
+++ linux-2.6.23/include/linux/spinlock.h
@@ -172,7 +172,90 @@ do {								\
 #define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
 #define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
 
-#define spin_lock(lock)			_spin_lock(lock)
+#undef TYPE_EQUAL
+#define TYPE_EQUAL(lock, type) \
+	__builtin_types_compatible_p(typeof(lock), type *)
+
+#define PICK_SPINOP(op, lock)						\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t))			\
+		__raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	else if (TYPE_EQUAL(lock, spinlock_t))				\
+		_spin##op((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINOP_RAW(op, lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t))			\
+		__raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	else if (TYPE_EQUAL(lock, spinlock_t))				\
+		__raw_spin##op(&((spinlock_t *)(lock))->raw_lock);	\
+} while (0)
+
+#define PICK_SPINLOCK_IRQ(lock)						\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_lock_irq((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINUNLOCK_IRQ(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_unlock_irq((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINLOCK_IRQ_RAW(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		local_irq_disable();					\
+		__raw_spin_lock(&((spinlock_t *)(lock))->raw_lock);	\
+} while (0)
+
+#define PICK_SPINUNLOCK_IRQ_RAW(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		__raw_spin_unlock(&((spinlock_t *)(lock))->raw_lock);	\
+		local_irq_enable();					\
+} while (0)
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+extern int __bad_spinlock_type(void);
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		(flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		flags = _spin_lock_irqsave((spinlock_t *)(lock));	\
+	else __bad_spinlock_type();					\
+} while (0)
+#else
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		(flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_lock_irqsave((spinlock_t *)(lock), flags);	\
+} while (0)
+#endif
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
+	do {								\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irqrestore(&((__ipipe_spinlock_t *)(lock))->__raw_lock, flags); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_unlock_irqrestore((spinlock_t *)(lock), flags);	\
+} while (0)
+
+#define spin_lock(lock)	PICK_SPINOP(_lock, lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
@@ -185,7 +268,7 @@ do {								\
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)	PICK_SPINLOCK_IRQSAVE(lock, flags)
 #define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
 #define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
 
@@ -199,7 +282,7 @@ do {								\
 
 #else
 
-#define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)	PICK_SPINLOCK_IRQSAVE(lock, flags)
 #define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
 #define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
@@ -207,7 +290,7 @@ do {								\
 
 #endif
 
-#define spin_lock_irq(lock)		_spin_lock_irq(lock)
+#define spin_lock_irq(lock)		PICK_SPINLOCK_IRQ(lock)
 #define spin_lock_bh(lock)		_spin_lock_bh(lock)
 
 #define read_lock_irq(lock)		_read_lock_irq(lock)
@@ -221,32 +304,40 @@ do {								\
  */
 #if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
 	!defined(CONFIG_SMP)
-# define spin_unlock(lock)		_spin_unlock(lock)
+#define spin_unlock(lock)		PICK_SPINOP(_unlock, lock)
 # define read_unlock(lock)		_read_unlock(lock)
 # define write_unlock(lock)		_write_unlock(lock)
-# define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
-# define read_unlock_irq(lock)		_read_unlock_irq(lock)
-# define write_unlock_irq(lock)		_write_unlock_irq(lock)
-#else
-# define spin_unlock(lock) \
-    do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define read_unlock(lock) \
-    do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define write_unlock(lock) \
-    do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define spin_unlock_irq(lock)			\
+# define spin_unlock_irq(lock)	PICK_SPINUNLOCK_IRQ(lock)
+# define read_unlock_irq(lock)	_read_unlock_irq(lock)
+# define write_unlock_irq(lock)	_write_unlock_irq(lock)
+#else
+# define spin_unlock(lock)			\
 do {						\
-	__raw_spin_unlock(&(lock)->raw_lock);	\
+	PICK_SPINOP_RAW(_unlock, lock); 	\
+	__release(lock);			\
+} while(0)
+# define read_unlock(lock)			\
+do {						\
+	__raw_read_unlock(&(lock)->raw_lock);	\
+	__release(lock);			\
+} while (0)
+# define write_unlock(lock)			\
+do {						\
+	__raw_write_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
-	local_irq_enable();			\
 } while (0)
-# define read_unlock_irq(lock)			\
+# define spin_unlock_irq(lock)		\
+do {						\
+	PICK_SPINUNLOCK_IRQ_RAW(lock);		\
+	__release(lock);			\
+} while(0)
+# define read_unlock_irq(lock)		\
 do {						\
 	__raw_read_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
 	local_irq_enable();			\
 } while (0)
-# define write_unlock_irq(lock)			\
+# define write_unlock_irq(lock)		\
 do {						\
 	__raw_write_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
@@ -254,8 +345,8 @@ do {						\
 } while (0)
 #endif
 
-#define spin_unlock_irqrestore(lock, flags) \
-					_spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)	\
+					PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
 #define read_unlock_irqrestore(lock, flags) \
@@ -346,4 +437,29 @@ extern int _atomic_dec_and_lock(atomic_t
  */
 #define spin_can_lock(lock)	(!spin_is_locked(lock))
 
+#ifdef CONFIG_IPIPE
+void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock);
+unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock,
+					     unsigned long x);
+void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x);
+#define spin_lock_irqsave_cond(lock, flags) \
+	spin_lock_irqsave(lock, flags)
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock_irqrestore(lock, flags)
+#else
+#define spin_lock_irqsave_cond(lock, flags) \
+	do { (void)(flags); spin_lock(lock); } while(0)
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock(lock)
+#define __ipipe_spin_lock_irq(lock)		do { } while(0)
+#define __ipipe_spin_unlock_irq(lock)		do { } while(0)
+#define __ipipe_spin_lock_irqsave(lock)		0
+#define __ipipe_spin_unlock_irqrestore(lock, x)	do { (void)(x); } while(0)
+#define __ipipe_spin_unlock_irqbegin(lock)	do { } while(0)
+#define __ipipe_spin_unlock_irqcomplete(x)	do { (void)(x); } while(0)
+#endif
+
 #endif /* __LINUX_SPINLOCK_H */
Index: linux-2.6.23/include/linux/spinlock_types.h
===================================================================
--- linux-2.6.23.orig/include/linux/spinlock_types.h
+++ linux-2.6.23/include/linux/spinlock_types.h
@@ -31,6 +31,10 @@ typedef struct {
 #endif
 } spinlock_t;
 
+typedef struct {
+	raw_spinlock_t __raw_lock;
+} __ipipe_spinlock_t;
+
 #define SPINLOCK_MAGIC		0xdead4ead
 
 typedef struct {
@@ -92,9 +96,19 @@ typedef struct {
  * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate.
  */
 #define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
+#define IPIPE_SPIN_LOCK_UNLOCKED					\
+	(__ipipe_spinlock_t) {	.__raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
 #define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
 
 #define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
 #define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
 
+#ifdef CONFIG_IPIPE
+# define ipipe_spinlock_t	__ipipe_spinlock_t
+# define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
+#else
+# define ipipe_spinlock_t	spinlock_t
+# define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x)
+#endif
+
 #endif /* __LINUX_SPINLOCK_TYPES_H */
Index: linux-2.6.23/init/Kconfig
===================================================================
--- linux-2.6.23.orig/init/Kconfig
+++ linux-2.6.23/init/Kconfig
@@ -64,6 +64,7 @@ config INIT_ENV_ARG_LIMIT
 
 config LOCALVERSION
 	string "Local version - append to kernel release"
+	default "-ipipe"
 	help
 	  Append an extra string to the end of your kernel version.
 	  This will show up when you type uname, for example.
@@ -668,3 +669,36 @@ config STOP_MACHINE
 	  Need stop_machine() primitive.
 
 source "block/Kconfig"
+
+menu "Real-time sub-system"
+
+comment "WARNING! You enabled APM, CPU Frequency scaling or ACPI 'processor'"
+	depends on APM || CPU_FREQ || ACPI_PROCESSOR
+comment "option. These options are known to cause troubles with Xenomai."
+	depends on APM || CPU_FREQ || ACPI_PROCESSOR
+
+comment "NOTE: Xenomai conflicts with PC speaker support."
+	depends on !X86_TSC && X86 && INPUT_PCSPKR
+comment "(menu Device Drivers/Input device support/Miscellaneous devices)"
+	depends on !X86_TSC && X86 && INPUT_PCSPKR
+
+comment "NOTE: Xenomai conflicts with HPET Timer support."
+	depends on !X86_LOCAL_APIC && X86 && HPET_TIMER
+comment "(menu Processor type and features/HPET Timer Support)"
+	depends on !X86_LOCAL_APIC && X86 && HPET_TIMER
+
+config XENOMAI
+	depends on ((X86_TSC || !X86 || !INPUT_PCSPKR) && (!HPET_TIMER || !X86 || X86_LOCAL_APIC))
+	bool "Xenomai"
+	default y
+        select IPIPE
+
+        help
+          Xenomai is a real-time extension to the Linux kernel. Note
+          that Xenomai relies on Adeos interrupt pipeline (CONFIG_IPIPE
+          option) to be enabled, so enabling this option selects the
+          CONFIG_IPIPE option.
+
+source "arch/i386/xenomai/Kconfig"
+
+endmenu
Index: linux-2.6.23/init/main.c
===================================================================
--- linux-2.6.23.orig/init/main.c
+++ linux-2.6.23/init/main.c
@@ -524,7 +524,7 @@ asmlinkage void __init start_kernel(void
 	unwind_init();
 	lockdep_init();
 
-	local_irq_disable();
+	local_irq_disable_hw();
 	early_boot_irqs_off();
 	early_init_irq_lock_class();
 
@@ -577,6 +577,11 @@ asmlinkage void __init start_kernel(void
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	/*
+	 * We need to wait for the interrupt and time subsystems to be
+	 * initialized before enabling the pipeline.
+	 */
+ 	ipipe_init();
 	profile_init();
 	if (!irqs_disabled())
 		printk("start_kernel(): bug: interrupts were enabled early\n");
@@ -737,6 +742,7 @@ static void __init do_basic_setup(void)
 	usermodehelper_init();
 	driver_init();
 	init_irq_proc();
+ 	ipipe_init_proc();
 	do_initcalls();
 }
 
Index: linux-2.6.23/kernel/Makefile
===================================================================
--- linux-2.6.23.orig/kernel/Makefile
+++ linux-2.6.23/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_IPIPE) += ipipe/
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
@@ -74,3 +75,5 @@ quiet_cmd_ikconfiggz = IKCFG   $@
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
 	$(call if_changed,ikconfiggz)
+
+obj-$(CONFIG_XENOMAI) += xenomai/
Index: linux-2.6.23/kernel/exit.c
===================================================================
--- linux-2.6.23.orig/kernel/exit.c
+++ linux-2.6.23/kernel/exit.c
@@ -973,6 +973,7 @@ fastcall NORET_TYPE void do_exit(long co
 
 	if (group_dead)
 		acct_process();
+ 	ipipe_exit_notify(tsk);
 	exit_sem(tsk);
 	__exit_files(tsk);
 	__exit_fs(tsk);
Index: linux-2.6.23/kernel/fork.c
===================================================================
--- linux-2.6.23.orig/kernel/fork.c
+++ linux-2.6.23/kernel/fork.c
@@ -392,6 +392,7 @@ void mmput(struct mm_struct *mm)
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
 		exit_mmap(mm);
+		ipipe_cleanup_notify(mm);
 		if (!list_empty(&mm->mmlist)) {
 			spin_lock(&mmlist_lock);
 			list_del(&mm->mmlist);
@@ -925,7 +926,7 @@ static inline void copy_flags(unsigned l
 {
 	unsigned long new_flags = p->flags;
 
-	new_flags &= ~PF_SUPERPRIV;
+	new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY);
 	new_flags |= PF_FORKNOEXEC;
 	if (!(clone_flags & CLONE_PTRACE))
 		p->ptrace = 0;
@@ -1274,6 +1275,14 @@ static struct task_struct *copy_process(
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
+#ifdef CONFIG_IPIPE
+	{
+	int k;
+
+	for (k = 0; k < IPIPE_ROOT_NPTDKEYS; k++)
+		p->ptd[k] = NULL;
+	}
+#endif /* CONFIG_IPIPE */
 	return p;
 
 bad_fork_cleanup_namespaces:
Index: linux-2.6.23/kernel/ipipe/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Kconfig
@@ -0,0 +1,25 @@
+config IPIPE
+	bool "Interrupt pipeline"
+	default y
+	---help---
+	  Activate this option if you want the interrupt pipeline to be
+	  compiled in.
+
+config IPIPE_DOMAINS
+	int "Max domains"
+	depends on IPIPE
+	default 4
+	---help---
+	The maximum number of I-pipe domains to run concurrently.
+
+config IPIPE_COMPAT
+	bool "Maintain code compatibility with older releases"
+	depends on IPIPE
+	default y
+	---help---
+	Activate this option if you want the compatibility code to be
+	defined, so that older I-pipe clients may use obsolete
+	constructs. WARNING: obsolete code will be eventually
+	deprecated in future I-pipe releases, and removed from the
+	compatibility support as time passes. Please fix I-pipe
+	clients to get rid of such uses as soon as possible.
Index: linux-2.6.23/kernel/ipipe/Kconfig.debug
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Kconfig.debug
@@ -0,0 +1,88 @@
+config IPIPE_DEBUG
+	bool "I-pipe debugging"
+	depends on IPIPE
+
+config IPIPE_DEBUG_CONTEXT
+	bool "Check for illicit cross-domain calls"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  Enable this feature to arm checkpoints in the kernel that
+	  verify the correct invocation context. On entry of critical
+	  Linux services a warning is issued if the caller is not
+	  running over the root domain.
+
+config IPIPE_TRACE
+	bool "Latency tracing"
+	depends on IPIPE_DEBUG
+	select FRAME_POINTER
+	select KALLSYMS
+	select PROC_FS
+	---help---
+	  Activate this option if you want to use per-function tracing of
+	  the kernel. The tracer will collect data via instrumentation
+	  features like the one below or with the help of explicite calls
+	  of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
+	  in-kernel tracing API. The collected data and runtime control
+	  is available via /proc/ipipe/trace/*.
+
+if IPIPE_TRACE
+
+config IPIPE_TRACE_ENABLE
+	bool "Enable tracing on boot"
+	default y
+	---help---
+	  Disable this option if you want to arm the tracer after booting
+	  manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
+	  boot time on slow embedded devices due to the tracer overhead.
+
+config IPIPE_TRACE_MCOUNT
+	bool "Instrument function entries"
+	default y
+	---help---
+	  When enabled, records every kernel function entry in the tracer
+	  log. While this slows down the system noticeably, it provides
+	  the highest level of information about the flow of events.
+	  However, it can be switch off in order to record only explicit
+	  I-pipe trace points.
+
+config IPIPE_TRACE_IRQSOFF
+	bool "Trace IRQs-off times"
+	default y
+	---help---
+	  Activate this option if I-pipe shall trace the longest path
+	  with hard-IRQs switched off.
+
+config IPIPE_TRACE_SHIFT
+	int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
+	range 10 18
+	default 14
+	---help---
+	  The number of trace points to hold tracing data for each
+	  trace path, as a power of 2.
+
+config IPIPE_TRACE_VMALLOC
+	bool "Use vmalloc'ed trace buffer"
+	default y if EMBEDDED
+	---help---
+	  Instead of reserving static kernel data, the required buffer
+	  is allocated via vmalloc during boot-up when this option is
+	  enabled. This can help to start systems that are low on memory,
+	  but it slightly degrades overall performance. Try this option
+	  when a traced kernel hangs unexpectedly at boot time.
+
+config IPIPE_TRACE_PANIC
+	bool "Enable panic back traces"
+	default y
+	---help---
+	  Provides services to freeze and dump a back trace on panic
+	  situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
+	  as well as ordinary kernel oopses. You can control the number
+	  of printed back trace points via /proc/ipipe/trace.
+
+config IPIPE_TRACE_ENABLE_VALUE
+	int
+	default 0 if !IPIPE_TRACE_ENABLE
+	default 1 if IPIPE_TRACE_ENABLE
+
+endif
Index: linux-2.6.23/kernel/ipipe/Makefile
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Makefile
@@ -0,0 +1,3 @@
+
+obj-$(CONFIG_IPIPE)	+= core.o
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
Index: linux-2.6.23/kernel/ipipe/core.c
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/core.c
@@ -0,0 +1,1623 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/core.c
+ *
+ * Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Architecture-independent I-PIPE core support.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/tick.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif	/* CONFIG_PROC_FS */
+#include <linux/ipipe_trace.h>
+#include <linux/ipipe_tickdev.h>
+
+static int __ipipe_ptd_key_count;
+
+static unsigned long __ipipe_ptd_key_map;
+
+static unsigned long __ipipe_domain_slot_map;
+
+struct ipipe_domain ipipe_root;
+
+#ifndef CONFIG_SMP
+/*
+ * Create an alias to the unique root status, so that arch-dep code
+ * may get simple and easy access to this percpu variable.  We also
+ * create an array of pointers to the percpu domain data; this tends
+ * to produce a better code when reaching non-root domains. We make
+ * sure that the early boot code would be able to dereference the
+ * pointer to the root domain data safely by statically initializing
+ * its value (local_irq*() routines depend on this).
+ */
+#if __GNUC__ >= 4
+extern unsigned long __ipipe_root_status
+__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray)))));
+EXPORT_SYMBOL(__ipipe_root_status);
+#else /* __GNUC__ < 4 */
+/*
+ * Work around a GCC 3.x issue making alias symbols unusable as
+ * constant initializers.
+ */
+unsigned long *const __ipipe_root_status_addr = &__raw_get_cpu_var(ipipe_percpu_darray);
+EXPORT_SYMBOL(__ipipe_root_status_addr);
+#endif /* __GNUC__ < 4 */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
+{ [0] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
+#endif /* !CONFIG_SMP */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
+{ [0] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
+
+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock);
+
+LIST_HEAD(__ipipe_pipeline);
+
+unsigned long __ipipe_virtual_irq_map;
+
+#ifdef CONFIG_PRINTK
+unsigned __ipipe_printk_virq;
+#endif /* CONFIG_PRINTK */
+
+int __ipipe_event_monitors[IPIPE_NR_EVENTS];
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+
+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device);
+
+static void __ipipe_set_tick_mode(enum clock_event_mode mode,
+				  struct clock_event_device *cdev)
+{
+	struct ipipe_tick_device *itd;
+	itd = &per_cpu(ipipe_tick_cpu_device, smp_processor_id());
+	itd->emul_set_mode(mode, itd);
+}
+
+static int __ipipe_set_next_tick(unsigned long evt,
+				 struct clock_event_device *cdev)
+{
+	uint64_t delta_ns = (uint64_t)cdev->delta;
+	struct ipipe_tick_device *itd;
+
+	if (delta_ns > ULONG_MAX)
+		delta_ns = ULONG_MAX;
+
+	itd = &per_cpu(ipipe_tick_cpu_device, smp_processor_id());
+	return itd->emul_set_tick((unsigned long)delta_ns, itd);
+}
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct ipipe_tick_device *tdev),
+			  int (*emutick)(unsigned long delta,
+					 struct ipipe_tick_device *tdev),
+			  int cpu)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	unsigned long flags;
+	int status;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		status = -EBUSY;
+		goto out;
+	}
+
+	slave = &per_cpu(tick_cpu_device, cpu);
+
+	if (strcmp(slave->evtdev->name, devname)) {
+		/*
+		 * No conflict so far with the current tick device,
+		 * check whether the requested device is sane and has
+		 * been blessed by the kernel.
+		 */
+		status = __ipipe_check_tickdev(devname) ?
+			CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN;
+		goto out;
+	}
+
+	/*
+	 * Our caller asks for using the same clock event device for
+	 * ticking than we do, let's create a tick emulation device to
+	 * interpose on the set_next_event() method, so that we may
+	 * both manage the device in oneshot mode. Only the tick
+	 * emulation code will actually program the clockchip hardware
+	 * for the next shot, though.
+	 *
+	 * CAUTION: we still have to grab the tick device even when it
+	 * current runs in periodic mode, since the kernel may switch
+	 * to oneshot dynamically (highres/no_hz tick mode).
+	 */
+
+	itd->slave = slave;
+	itd->emul_set_mode = emumode;
+	itd->emul_set_tick = emutick;
+	itd->real_set_mode = slave->evtdev->set_mode;
+	itd->real_set_tick = slave->evtdev->set_next_event;
+	slave->evtdev->set_mode = __ipipe_set_tick_mode;
+	slave->evtdev->set_next_event = __ipipe_set_next_tick;
+	status = slave->evtdev->mode;
+out:
+	ipipe_critical_exit(flags);
+
+	return status;
+}
+
+void ipipe_release_tickdev(int cpu)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	unsigned long flags;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		slave = &per_cpu(tick_cpu_device, cpu);
+		slave->evtdev->set_mode = itd->real_set_mode;
+		slave->evtdev->set_next_event = itd->real_set_tick;
+		itd->slave = NULL;
+	}
+
+	ipipe_critical_exit(flags);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+/*
+ * ipipe_init() -- Initialization routine of the IPIPE layer. Called
+ * by the host kernel early during the boot procedure.
+ */
+void __init ipipe_init(void)
+{
+	struct ipipe_domain *ipd = &ipipe_root;
+
+	__ipipe_check_platform();	/* Do platform dependent checks first. */
+
+	/*
+	 * A lightweight registration code for the root domain. We are
+	 * running on the boot CPU, hw interrupts are off, and
+	 * secondary CPUs are still lost in space.
+	 */
+
+	/* Reserve percpu data slot #0 for the root domain. */
+	ipd->slot = 0;
+	set_bit(0, &__ipipe_domain_slot_map);
+
+	ipd->name = "Linux";
+	ipd->domid = IPIPE_ROOT_ID;
+	ipd->priority = IPIPE_ROOT_PRIO;
+
+	__ipipe_init_stage(ipd);
+
+	INIT_LIST_HEAD(&ipd->p_link);
+	list_add_tail(&ipd->p_link, &__ipipe_pipeline);
+
+	__ipipe_init_platform();
+
+#ifdef CONFIG_PRINTK
+	__ipipe_printk_virq = ipipe_alloc_virq();	/* Cannot fail here. */
+	ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk;
+	ipd->irqs[__ipipe_printk_virq].cookie = NULL;
+	ipd->irqs[__ipipe_printk_virq].acknowledge = NULL;
+	ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
+#endif /* CONFIG_PRINTK */
+
+	__ipipe_enable_pipeline();
+
+	printk(KERN_INFO "I-pipe %s: pipeline enabled.\n",
+	       IPIPE_VERSION_STRING);
+}
+
+void __ipipe_init_stage(struct ipipe_domain *ipd)
+{
+	int cpu, n;
+
+	for_each_online_cpu(cpu) {
+
+		ipipe_percpudom(ipd, irqpend_himask, cpu) = 0;
+
+		for (n = 0; n < IPIPE_IRQ_IWORDS; n++) {
+			ipipe_percpudom(ipd, irqpend_lomask, cpu)[n] = 0;
+			ipipe_percpudom(ipd, irqheld_mask, cpu)[n] = 0;
+		}
+
+		for (n = 0; n < IPIPE_NR_IRQS; n++)
+			ipipe_percpudom(ipd, irqall, cpu)[n] = 0;
+
+		ipipe_percpudom(ipd, evsync, cpu) = 0;
+	}
+
+	for (n = 0; n < IPIPE_NR_IRQS; n++) {
+		ipd->irqs[n].acknowledge = NULL;
+		ipd->irqs[n].handler = NULL;
+		ipd->irqs[n].control = IPIPE_PASS_MASK;	/* Pass but don't handle */
+	}
+
+	for (n = 0; n < IPIPE_NR_EVENTS; n++)
+		ipd->evhand[n] = NULL;
+
+	ipd->evself = 0LL;
+	mutex_init(&ipd->mutex);
+
+	__ipipe_hook_critical_ipi(ipd);
+}
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
+{
+	ipipe_unstall_pipeline_from(ipd);
+
+#ifdef CONFIG_SMP
+	{
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, irqpend_himask, cpu) != 0)
+				cpu_relax();
+		}
+	}
+#else
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL;
+#endif
+
+	clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+}
+
+void __ipipe_unstall_root(void)
+{
+	BUG_ON(!ipipe_root_domain_p);
+
+        local_irq_disable_hw();
+
+        __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+
+        if (unlikely(ipipe_root_cpudom_var(irqpend_himask) != 0))
+                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+        local_irq_enable_hw();
+}
+
+void __ipipe_restore_root(unsigned long x)
+{
+	BUG_ON(!ipipe_root_domain_p);
+
+	if (x)
+		__ipipe_stall_root();
+	else
+		__ipipe_unstall_root();
+}
+
+void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (__ipipe_pipeline_head_p(ipd))
+		local_irq_disable_hw();
+}
+
+unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long x;
+
+	x = test_and_set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (__ipipe_pipeline_head_p(ipd))
+		local_irq_disable_hw();
+
+	return x;
+}
+
+/*
+ * ipipe_unstall_pipeline_from() -- Unstall the pipeline and
+ * synchronize pending interrupts for a given domain. See
+ * __ipipe_walk_pipeline() for more information.
+ */
+void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	struct list_head *pos;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (ipd == ipipe_current_domain)
+		pos = &ipd->p_link;
+	else
+		pos = __ipipe_pipeline.next;
+
+	__ipipe_walk_pipeline(pos);
+
+	if (likely(__ipipe_pipeline_head_p(ipd)))
+		local_irq_enable_hw();
+	else
+		local_irq_restore_hw(flags);
+}
+
+unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long x;
+
+	x = test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+	ipipe_unstall_pipeline_from(ipd);
+
+	return x;
+}
+
+void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x)
+{
+	if (x)
+		ipipe_stall_pipeline_from(ipd);
+	else
+		ipipe_unstall_pipeline_from(ipd);
+}
+
+void ipipe_unstall_pipeline_head(void)
+{
+	struct ipipe_domain *head_domain;
+
+	local_irq_disable_hw();
+
+	head_domain = __ipipe_pipeline_head();
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+
+	if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+		if (likely(head_domain == ipipe_current_domain))
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+		else
+			__ipipe_walk_pipeline(&head_domain->p_link);
+        }
+
+	local_irq_enable_hw();
+}
+
+void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain, unsigned long x)
+{
+	local_irq_disable_hw();
+
+	if (x) {
+#ifdef CONFIG_DEBUG_KERNEL
+		static int warned;
+		if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) {
+			/*
+			 * Already stalled albeit ipipe_restore_pipeline_head()
+			 * should have detected it? Send a warning once.
+			 */
+			warned = 1;
+			printk(KERN_WARNING
+				   "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n");
+			dump_stack();
+		}
+#else /* !CONFIG_DEBUG_KERNEL */
+		set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+#endif /* CONFIG_DEBUG_KERNEL */
+	}
+	else {
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+		if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+			if (likely(head_domain == ipipe_current_domain))
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			else
+				__ipipe_walk_pipeline(&head_domain->p_link);
+		}
+		local_irq_enable_hw();
+	}
+}
+
+void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock)
+{
+	local_irq_disable_hw();
+	__raw_spin_lock(lock);
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock)
+{
+	__raw_spin_unlock(lock);
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_enable_hw();
+}
+
+unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock)
+{
+	unsigned long flags;
+	int s;
+
+	local_irq_save_hw(flags);
+	__raw_spin_lock(lock);
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+	return raw_mangle_irq_bits(s, flags);
+}
+
+void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x)
+{
+	__raw_spin_unlock(lock);
+	if (!raw_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
+{
+	__raw_spin_unlock(&lock->__raw_lock);
+}
+
+void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x)
+{
+	if (!raw_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq)
+{
+	int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		__set_bit(rank, &ipipe_cpudom_var(ipd, irqpend_lomask)[level]);
+		__set_bit(level,&ipipe_cpudom_var(ipd, irqpend_himask));
+	} else
+		__set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]);
+
+	ipipe_cpudom_var(ipd, irqall)[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq)
+{
+	if (likely(!test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+		if (__test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, cpu)[level]))
+			__set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]);
+		if (ipipe_percpudom(ipd, irqpend_lomask, cpu)[level] == 0)
+			__clear_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu));
+	}
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)
+{
+	int cpu;
+
+	if (likely(test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+		for_each_online_cpu(cpu) {
+			if (test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqheld_mask, cpu)[level])) {
+				/* We need atomic ops here: */
+				set_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, cpu)[level]);
+				set_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu));
+			}
+		}
+	}
+}
+
+/* __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must
+   be called with local hw interrupts disabled. */
+
+void fastcall __ipipe_walk_pipeline(struct list_head *pos)
+{
+	struct ipipe_domain *this_domain = ipipe_current_domain, *next_domain;
+
+	while (pos != &__ipipe_pipeline) {
+
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status)))
+			break;	/* Stalled stage -- do not go further. */
+
+		if (ipipe_cpudom_var(next_domain, irqpend_himask) != 0) {
+
+			if (next_domain == this_domain)
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			else {
+
+				ipipe_cpudom_var(this_domain, evsync) = 0;
+				ipipe_current_domain = next_domain;
+				ipipe_suspend_domain();	/* Sync stage and propagate interrupts. */
+
+				if (ipipe_current_domain == next_domain)
+					ipipe_current_domain = this_domain;
+				/*
+				 * Otherwise, something changed the current domain under our
+				 * feet recycling the register set; do not override the new
+				 * domain.
+				 */
+
+				if (ipipe_cpudom_var(this_domain, irqpend_himask) != 0 &&
+				    !test_bit(IPIPE_STALL_FLAG,
+					      &ipipe_cpudom_var(this_domain, status)))
+					__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			}
+
+			break;
+		} else if (next_domain == this_domain)
+			break;
+
+		pos = next_domain->p_link.next;
+	}
+}
+
+/*
+ * ipipe_suspend_domain() -- Suspend the current domain, switching to
+ * the next one which has pending work down the pipeline.
+ */
+void ipipe_suspend_domain(void)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	struct list_head *ln;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+	this_domain = next_domain = ipipe_current_domain;
+
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(this_domain, status));
+
+	if (ipipe_cpudom_var(this_domain, irqpend_himask) != 0)
+		goto sync_stage;
+
+	for (;;) {
+		ln = next_domain->p_link.next;
+
+		if (ln == &__ipipe_pipeline)
+			break;
+
+		next_domain = list_entry(ln, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG,
+			     &ipipe_cpudom_var(next_domain, status)) != 0)
+			break;
+
+		if (ipipe_cpudom_var(next_domain, irqpend_himask) == 0)
+			continue;
+
+		ipipe_current_domain = next_domain;
+
+sync_stage:
+		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+		if (ipipe_current_domain != next_domain)
+			/*
+			 * Something has changed the current domain under our
+			 * feet, recycling the register set; take note.
+			 */
+			this_domain = ipipe_current_domain;
+	}
+
+	ipipe_current_domain = this_domain;
+
+	local_irq_restore_hw(flags);
+}
+
+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt.
+ * Virtual interrupts are handled in exactly the same way than their
+ * hw-generated counterparts wrt pipelining.
+ */
+unsigned ipipe_alloc_virq(void)
+{
+	unsigned long flags, irq = 0;
+	int ipos;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	if (__ipipe_virtual_irq_map != ~0) {
+		ipos = ffz(__ipipe_virtual_irq_map);
+		set_bit(ipos, &__ipipe_virtual_irq_map);
+		irq = ipos + IPIPE_VIRQ_BASE;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return irq;
+}
+
+/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw
+   acknowledge routine) to an interrupt for a given domain. */
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask)
+{
+	unsigned long flags;
+	int err;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
+		return -EPERM;
+
+	if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags))
+		/* Silently unwire interrupts for non-heading domains. */
+		modemask &= ~IPIPE_WIRED_MASK;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	if (handler != NULL) {
+		if (handler == IPIPE_SAME_HANDLER) {
+			handler = ipd->irqs[irq].handler;
+			cookie = ipd->irqs[irq].cookie;
+
+			if (handler == NULL) {
+				err = -EINVAL;
+				goto unlock_and_exit;
+			}
+		} else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 &&
+			   ipd->irqs[irq].handler != NULL) {
+			err = -EBUSY;
+			goto unlock_and_exit;
+		}
+
+		/* Wired interrupts can only be delivered to domains
+		 * always heading the pipeline, and using dynamic
+		 * propagation. */
+
+		if ((modemask & IPIPE_WIRED_MASK) != 0) {
+			if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) {
+				err = -EINVAL;
+				goto unlock_and_exit;
+			}
+			modemask |= (IPIPE_HANDLE_MASK);
+		}
+
+		if ((modemask & IPIPE_STICKY_MASK) != 0)
+			modemask |= IPIPE_HANDLE_MASK;
+	} else
+		modemask &=
+		    ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK |
+		      IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK);
+
+	if (acknowledge == NULL && !ipipe_virtual_irq_p(irq))
+		/* Acknowledge handler unspecified for a hw interrupt:
+		   use the Linux-defined handler instead. */
+		acknowledge = ipipe_root_domain->irqs[irq].acknowledge;
+
+	ipd->irqs[irq].handler = handler;
+	ipd->irqs[irq].cookie = cookie;
+	ipd->irqs[irq].acknowledge = acknowledge;
+	ipd->irqs[irq].control = modemask;
+
+	if (irq < NR_IRQS && handler != NULL && !ipipe_virtual_irq_p(irq)) {
+		__ipipe_enable_irqdesc(ipd, irq);
+
+		if ((modemask & IPIPE_ENABLE_MASK) != 0) {
+			if (ipd != ipipe_current_domain) {
+				/* IRQ enable/disable state is domain-sensitive, so we may
+				   not change it for another domain. What is allowed
+				   however is forcing some domain to handle an interrupt
+				   source, by passing the proper 'ipd' descriptor which
+				   thus may be different from ipipe_current_domain. */
+				err = -EPERM;
+				goto unlock_and_exit;
+			}
+			__ipipe_enable_irq(irq);
+		}
+	}
+
+	err = 0;
+
+      unlock_and_exit:
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return err;
+}
+
+/* ipipe_control_irq() -- Change modes of a pipelined interrupt for
+ * the current domain. */
+
+int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask)
+{
+	struct ipipe_domain *ipd;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	ipd = ipipe_current_domain;
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
+		return -EPERM;
+
+	if (ipd->irqs[irq].handler == NULL)
+		setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	if ((setmask & IPIPE_STICKY_MASK) != 0)
+		setmask |= IPIPE_HANDLE_MASK;
+
+	if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0)	/* If one goes, both go. */
+		clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	ipd->irqs[irq].control &= ~clrmask;
+	ipd->irqs[irq].control |= setmask;
+
+	if ((setmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_enable_irq(irq);
+	else if ((clrmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_disable_irq(irq);
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return 0;
+}
+
+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */
+
+int fastcall __ipipe_dispatch_event (unsigned event, void *data)
+{
+	struct ipipe_domain *start_domain, *this_domain, *next_domain;
+	ipipe_event_handler_t evhand;
+	struct list_head *pos, *npos;
+	unsigned long flags;
+	int propagate = 1;
+
+	local_irq_save_hw(flags);
+
+	start_domain = this_domain = ipipe_current_domain;
+
+	list_for_each_safe(pos, npos, &__ipipe_pipeline) {
+		/*
+		 * Note: Domain migration may occur while running
+		 * event or interrupt handlers, in which case the
+		 * current register set is going to be recycled for a
+		 * different domain than the initiating one. We do
+		 * care for that, always tracking the current domain
+		 * descriptor upon return from those handlers.
+		 */
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		/*
+		 * Keep a cached copy of the handler's address since
+		 * ipipe_catch_event() may clear it under our feet.
+		 */
+		evhand = next_domain->evhand[event];
+
+		if (evhand != NULL) {
+			ipipe_current_domain = next_domain;
+			ipipe_cpudom_var(next_domain, evsync) |= (1LL << event);
+			local_irq_restore_hw(flags);
+			propagate = !evhand(event, start_domain, data);
+			local_irq_save_hw(flags);
+			ipipe_cpudom_var(next_domain, evsync) &= ~(1LL << event);
+			if (ipipe_current_domain != next_domain)
+				this_domain = ipipe_current_domain;
+		}
+
+		if (next_domain != ipipe_root_domain &&	/* NEVER sync the root stage here. */
+		    ipipe_cpudom_var(next_domain, irqpend_himask) != 0 &&
+		    !test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status))) {
+			ipipe_current_domain = next_domain;
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			if (ipipe_current_domain != next_domain)
+				this_domain = ipipe_current_domain;
+		}
+
+		ipipe_current_domain = this_domain;
+
+		if (next_domain == this_domain || !propagate)
+			break;
+	}
+
+	local_irq_restore_hw(flags);
+
+	return !propagate;
+}
+
+/*
+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired
+ * interrupts are immediately and unconditionally delivered to the
+ * domain heading the pipeline upon receipt, and such domain must have
+ * been registered as an invariant head for the system (priority ==
+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is
+ * to get an extra-fast dispatching path for those IRQs, by relying on
+ * a straightforward logic based on assumptions that must always be
+ * true for invariant head domains.  The following assumptions are
+ * made when dealing with such interrupts:
+ *
+ * 1- Wired interrupts are purely dynamic, i.e. the decision to
+ * propagate them down the pipeline must be done from the head domain
+ * ISR.
+ * 2- Wired interrupts cannot be shared or sticky.
+ * 3- The root domain cannot be an invariant pipeline head, in
+ * consequence of what the root domain cannot handle wired
+ * interrupts.
+ * 4- Wired interrupts must have a valid acknowledge handler for the
+ * head domain (if needed), and in any case, must not rely on handlers
+ * provided by lower priority domains during the acknowledge cycle
+ * (see __ipipe_handle_irq).
+ *
+ * Called with hw interrupts off.
+ */
+
+int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, unsigned irq)
+{
+	struct ipipe_domain *old;
+
+	if (test_bit(IPIPE_LOCK_FLAG, &head_domain->irqs[irq].control)) {
+		/* If we can't process this IRQ right now, we must
+		 * mark it as held, so that it will get played during
+		 * normal log sync when the corresponding interrupt
+		 * source is eventually unlocked. */
+		ipipe_cpudom_var(head_domain, irqall)[irq]++;
+		__set_bit(irq & IPIPE_IRQ_IMASK, &ipipe_cpudom_var(head_domain, irqheld_mask)[irq >> IPIPE_IRQ_ISHIFT]);
+		return 0;
+	}
+
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) {
+		__ipipe_set_irq_pending(head_domain, irq);
+		return 0;
+	}
+
+	old = ipipe_current_domain;
+	ipipe_current_domain = head_domain; /* Switch to the head domain. */
+
+	ipipe_cpudom_var(head_domain, irqall)[irq]++;
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+	head_domain->irqs[irq].handler(irq, head_domain->irqs[irq].cookie); /* Call the ISR. */
+	__ipipe_run_irqtail();
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+
+	/* We expect the caller to start a complete pipeline walk upon
+	 * return, so that propagated interrupts will get played. */
+
+	if (ipipe_current_domain == head_domain)
+		ipipe_current_domain = old; /* Back to the preempted domain. */
+
+	return 1;
+}
+
+/*
+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current
+ * domain (and processor). This routine flushes the interrupt log
+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for
+ * more on the deferred interrupt scheme). Every interrupt that
+ * occurred while the pipeline was stalled gets played. WARNING:
+ * callers on SMP boxen should always check for CPU migration on
+ * return of this routine. One can control the kind of interrupts
+ * which are going to be sync'ed using the syncmask
+ * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT
+ * plays virtual interrupts only.
+ *
+ * This routine must be called with hw interrupts off.
+ */
+void fastcall __ipipe_sync_stage(unsigned long syncmask)
+{
+	unsigned long mask, submask;
+	struct ipipe_domain *ipd;
+	int level, rank, cpu;
+	unsigned irq;
+
+	if (__test_and_set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status)))
+		return;
+
+	ipd = ipipe_current_domain;
+	cpu = ipipe_processor_id();
+
+	/*
+	 * The policy here is to keep the dispatching code interrupt-free
+	 * by stalling the current stage. If the upper domain handler
+	 * (which we call) wants to re-enable interrupts while in a safe
+	 * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's
+	 * sigaction()), it will have to unstall (then stall again before
+	 * returning to us!) the stage when it sees fit.
+	 */
+	while ((mask = (ipipe_this_cpudom_var(irqpend_himask) & syncmask)) != 0) {
+		level = __ipipe_ffnz(mask);
+
+		while ((submask = ipipe_this_cpudom_var(irqpend_lomask)[level]) != 0) {
+			rank = __ipipe_ffnz(submask);
+			irq = (level << IPIPE_IRQ_ISHIFT) + rank;
+
+			if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) {
+				__clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]);
+				continue;
+			}
+
+			__clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]);
+
+			if (ipipe_this_cpudom_var(irqpend_lomask)[level] == 0)
+				__clear_bit(level, &ipipe_this_cpudom_var(irqpend_himask));
+
+			__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+			if (ipd == ipipe_root_domain)
+				trace_hardirqs_off();
+
+			__ipipe_run_isr(ipd, irq);
+#ifdef CONFIG_SMP
+			{
+				int newcpu = ipipe_processor_id();
+
+				if (newcpu != cpu) {	/* Handle CPU migration. */
+					/*
+					 * We expect any domain to clear the SYNC bit each
+					 * time it switches in a new task, so that preemptions
+					 * and/or CPU migrations (in the SMP case) over the
+					 * ISR do not lock out the log syncer for some
+					 * indefinite amount of time. In the Linux case,
+					 * schedule() handles this (see kernel/sched.c). For
+					 * this reason, we don't bother clearing it here for
+					 * the source CPU in the migration handling case,
+					 * since it must have scheduled another task in by
+					 * now.
+					 */
+					__set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status));
+					cpu = newcpu;
+				}
+			}
+#endif	/* CONFIG_SMP */
+			if (ipd == ipipe_root_domain &&
+			    test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)))
+				trace_hardirqs_on();
+
+			__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+		}
+	}
+
+	__clear_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+/* ipipe_register_domain() -- Link a new domain to the pipeline. */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr)
+{
+	struct ipipe_domain *_ipd;
+	struct list_head *pos;
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may register a new domain.\n");
+		return -EPERM;
+	}
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY &&
+	    test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags))
+		return -EAGAIN;	/* Cannot override current head. */
+
+	flags = ipipe_critical_enter(NULL);
+
+	pos = NULL;
+	ipd->slot = ffz(__ipipe_domain_slot_map);
+
+	if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
+		set_bit(ipd->slot, &__ipipe_domain_slot_map);
+		list_for_each(pos, &__ipipe_pipeline) {
+			_ipd = list_entry(pos, struct ipipe_domain, p_link);
+			if (_ipd->domid == attr->domid)
+				break;
+		}
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (pos != &__ipipe_pipeline) {
+		if (ipd->slot < CONFIG_IPIPE_DOMAINS)
+			clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+		return -EBUSY;
+	}
+
+#ifndef CONFIG_SMP
+	/*
+	 * Set up the perdomain pointers for direct access to the
+	 * percpu domain data. This saves a costly multiply each time
+	 * we need to refer to the contents of the percpu domain data
+	 * array.
+	 */
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot];
+#endif
+
+	ipd->name = attr->name;
+	ipd->domid = attr->domid;
+	ipd->pdd = attr->pdd;
+	ipd->flags = 0;
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY) {
+		ipd->priority = INT_MAX;
+		__set_bit(IPIPE_AHEAD_FLAG,&ipd->flags);
+	}
+	else
+		ipd->priority = attr->priority;
+
+	__ipipe_init_stage(ipd);
+
+	INIT_LIST_HEAD(&ipd->p_link);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_add_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	flags = ipipe_critical_enter(NULL);
+
+	list_for_each(pos, &__ipipe_pipeline) {
+		_ipd = list_entry(pos, struct ipipe_domain, p_link);
+		if (ipd->priority > _ipd->priority)
+			break;
+	}
+
+	list_add_tail(&ipd->p_link, pos);
+
+	ipipe_critical_exit(flags);
+
+	printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name);
+
+	/*
+	 * Finally, allow the new domain to perform its initialization
+	 * chores.
+	 */
+
+	if (attr->entry != NULL) {
+		ipipe_current_domain = ipd;
+		attr->entry();
+		ipipe_current_domain = ipipe_root_domain;
+
+		local_irq_save_hw(flags);
+
+		if (ipipe_root_cpudom_var(irqpend_himask) != 0 &&
+		    !test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+		local_irq_restore_hw(flags);
+	}
+
+	return 0;
+}
+
+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd)
+{
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may unregister a domain.\n");
+		return -EPERM;
+	}
+
+	if (ipd == ipipe_root_domain) {
+		printk(KERN_WARNING
+		       "I-pipe: Cannot unregister the root domain.\n");
+		return -EPERM;
+	}
+#ifdef CONFIG_SMP
+	{
+		unsigned irq;
+		int cpu;
+
+		/*
+		 * In the SMP case, wait for the logged events to drain on
+		 * other processors before eventually removing the domain
+		 * from the pipeline.
+		 */
+
+		ipipe_unstall_pipeline_from(ipd);
+
+		flags = ipipe_critical_enter(NULL);
+
+		for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+			clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control);
+			clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control);
+			set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control);
+		}
+
+		ipipe_critical_exit(flags);
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, irqpend_himask, cpu) > 0)
+				cpu_relax();
+		}
+	}
+#endif	/* CONFIG_SMP */
+
+	mutex_lock(&ipd->mutex);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_remove_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	/*
+	 * Simply remove the domain from the pipeline and we are almost done.
+	 */
+
+	flags = ipipe_critical_enter(NULL);
+	list_del_init(&ipd->p_link);
+	ipipe_critical_exit(flags);
+
+	__ipipe_cleanup_domain(ipd);
+
+	mutex_unlock(&ipd->mutex);
+
+	printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name);
+
+	return 0;
+}
+
+/*
+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of
+ * a running interrupt handler to the next domain down the pipeline.
+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts
+ * to pend the interrupt for the current domain first.
+ */
+int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *ln;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS ||
+	    (ipipe_virtual_irq_p(irq)
+	     && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)))
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	ln = head;
+
+	while (ln != &__ipipe_pipeline) {
+
+		ipd = list_entry(ln, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) {
+			__ipipe_set_irq_pending(ipd, irq);
+			local_irq_restore_hw(flags);
+			return 1;
+		}
+
+		ln = ipd->p_link.next;
+	}
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */
+
+int ipipe_free_virq(unsigned virq)
+{
+	if (!ipipe_virtual_irq_p(virq))
+		return -EINVAL;
+
+	clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
+
+	return 0;
+}
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr)
+{
+	attr->name = "anon";
+	attr->domid = 1;
+	attr->entry = NULL;
+	attr->priority = IPIPE_ROOT_PRIO;
+	attr->pdd = NULL;
+}
+
+/*
+ * ipipe_catch_event() -- Interpose or remove an event handler for a
+ * given domain.
+ */
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler)
+{
+	ipipe_event_handler_t old_handler;
+	unsigned long flags;
+	int self = 0, cpu;
+
+	if (event & IPIPE_EVENT_SELF) {
+		event &= ~IPIPE_EVENT_SELF;
+		self = 1;
+	}
+
+	if (event >= IPIPE_NR_EVENTS)
+		return NULL;
+
+	flags = ipipe_critical_enter(NULL);
+
+	if (!(old_handler = xchg(&ipd->evhand[event],handler)))	{
+		if (handler) {
+			if (self)
+				ipd->evself |= (1LL << event);
+			else
+				__ipipe_event_monitors[event]++;
+		}
+	}
+	else if (!handler) {
+		if (ipd->evself & (1LL << event))
+			ipd->evself &= ~(1LL << event);
+		else
+			__ipipe_event_monitors[event]--;
+	} else if ((ipd->evself & (1LL << event)) && !self) {
+			__ipipe_event_monitors[event]++;
+			ipd->evself &= ~(1LL << event);
+	} else if (!(ipd->evself & (1LL << event)) && self) {
+			__ipipe_event_monitors[event]--;
+			ipd->evself |= (1LL << event);
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (!handler && ipipe_root_domain_p) {
+		/*
+		 * If we cleared a handler on behalf of the root
+		 * domain, we have to wait for any current invocation
+		 * to drain, since our caller might subsequently unmap
+		 * the target domain. To this aim, this code
+		 * synchronizes with __ipipe_dispatch_event(),
+		 * guaranteeing that either the dispatcher sees a null
+		 * handler in which case it discards the invocation
+		 * (which also prevents from entering a livelock), or
+		 * finds a valid handler and calls it. Symmetrically,
+		 * ipipe_catch_event() ensures that the called code
+		 * won't be unmapped under our feet until the event
+		 * synchronization flag is cleared for the given event
+		 * on all CPUs.
+		 */
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event))
+				schedule_timeout_interruptible(HZ / 50);
+		}
+	}
+
+	return old_handler;
+}
+
+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
+{
+#ifdef CONFIG_SMP
+	if (irq >= IPIPE_NR_XIRQS)
+		/* Allow changing affinity of external IRQs only. */
+		return CPU_MASK_NONE;
+
+	if (num_online_cpus() > 1)
+		return __ipipe_set_irq_affinity(irq,cpumask);
+#endif /* CONFIG_SMP */
+
+	return CPU_MASK_NONE;
+}
+
+int fastcall ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
+
+{
+#ifdef CONFIG_SMP
+	return __ipipe_send_ipi(ipi,cpumask);
+#else /* !CONFIG_SMP */
+	return -EINVAL;
+#endif /* CONFIG_SMP */
+}
+
+int ipipe_alloc_ptdkey (void)
+{
+	unsigned long flags;
+	int key = -1;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
+		key = ffz(__ipipe_ptd_key_map);
+		set_bit(key,&__ipipe_ptd_key_map);
+		__ipipe_ptd_key_count++;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return key;
+}
+
+int ipipe_free_ptdkey (int key)
+{
+	unsigned long flags;
+
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (test_and_clear_bit(key,&__ipipe_ptd_key_map))
+		__ipipe_ptd_key_count--;
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return 0;
+}
+
+int fastcall ipipe_set_ptd (int key, void *value)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	current->ptd[key] = value;
+
+	return 0;
+}
+
+void fastcall *ipipe_get_ptd (int key)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return NULL;
+
+	return current->ptd[key];
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct proc_dir_entry *ipipe_proc_root;
+
+static int __ipipe_version_info_proc(char *page,
+				     char **start,
+				     off_t off, int count, int *eof, void *data)
+{
+	int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING);
+
+	len -= off;
+
+	if (len <= off + count)
+		*eof = 1;
+
+	*start = page + off;
+
+	if(len > count)
+		len = count;
+
+	if(len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
+{
+	struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
+	char handling, stickiness, lockbit, exclusive, virtuality;
+
+	unsigned long ctlbits;
+	unsigned irq;
+
+	seq_printf(p, "       +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n");
+	seq_printf(p, "       |+---- Sticky\n");
+	seq_printf(p, "       ||+--- Locked\n");
+	seq_printf(p, "       |||+-- Exclusive\n");
+	seq_printf(p, "       ||||+- Virtual\n");
+	seq_printf(p, "[IRQ]  |||||\n");
+
+	mutex_lock(&ipd->mutex);
+
+	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+		/* Remember to protect against
+		 * ipipe_virtual_irq/ipipe_control_irq if more fields
+		 * get involved. */
+		ctlbits = ipd->irqs[irq].control;
+
+		if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
+			/*
+			 * There might be a hole between the last external
+			 * IRQ and the first virtual one; skip it.
+			 */
+			continue;
+
+		if (ipipe_virtual_irq_p(irq)
+		    && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
+			/* Non-allocated virtual IRQ; skip it. */
+			continue;
+
+		/*
+		 * Statuses are as follows:
+		 * o "accepted" means handled _and_ passed down the pipeline.
+		 * o "grabbed" means handled, but the interrupt might be
+		 * terminated _or_ passed down the pipeline depending on
+		 * what the domain handler asks for to the I-pipe.
+		 * o "wired" is basically the same as "grabbed", except that
+		 * the interrupt is unconditionally delivered to an invariant
+		 * pipeline head domain.
+		 * o "passed" means unhandled by the domain but passed
+		 * down the pipeline.
+		 * o "discarded" means unhandled and _not_ passed down the
+		 * pipeline. The interrupt merely disappears from the
+		 * current domain down to the end of the pipeline.
+		 */
+		if (ctlbits & IPIPE_HANDLE_MASK) {
+			if (ctlbits & IPIPE_PASS_MASK)
+				handling = 'A';
+			else if (ctlbits & IPIPE_WIRED_MASK)
+				handling = 'W';
+			else
+				handling = 'G';
+		} else if (ctlbits & IPIPE_PASS_MASK)
+			/* Do not output if no major action is taken. */
+			continue;
+		else
+			handling = 'D';
+
+		if (ctlbits & IPIPE_STICKY_MASK)
+			stickiness = 'S';
+		else
+			stickiness = '.';
+
+		if (ctlbits & IPIPE_LOCK_MASK)
+			lockbit = 'L';
+		else
+			lockbit = '.';
+
+		if (ctlbits & IPIPE_EXCLUSIVE_MASK)
+			exclusive = 'X';
+		else
+			exclusive = '.';
+
+		if (ipipe_virtual_irq_p(irq))
+			virtuality = 'V';
+		else
+			virtuality = '.';
+
+		seq_printf(p, " %3u:  %c%c%c%c%c\n",
+			     irq, handling, stickiness, lockbit, exclusive, virtuality);
+	}
+
+	seq_printf(p, "[Domain info]\n");
+
+	seq_printf(p, "id=0x%.8x\n", ipd->domid);
+
+	if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags))
+		seq_printf(p, "priority=topmost\n");
+	else
+		seq_printf(p, "priority=%d\n", ipd->priority);
+
+	mutex_unlock(&ipd->mutex);
+
+	return 0;
+}
+
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data);
+}
+
+static struct file_operations __ipipe_info_proc_ops = {
+	.owner		= THIS_MODULE,
+	.open		= __ipipe_common_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd)
+{
+	struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root);
+	if (e) {
+		e->proc_fops = &__ipipe_info_proc_ops;
+		e->data = (void*) ipd;
+	}
+}
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd)
+{
+	remove_proc_entry(ipd->name,ipipe_proc_root);
+}
+
+void __init ipipe_init_proc(void)
+{
+	ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0);
+	create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL);
+	__ipipe_add_domain_proc(ipipe_root_domain);
+
+	__ipipe_init_tracer();
+}
+
+#endif	/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 };
+
+void ipipe_check_context(struct ipipe_domain *border_ipd)
+{
+	/* Note: We don't make the per_cpu access atomic. We assume that code
+	   which temporarily disables the check does this in atomic context
+	   only. */
+	if (likely(ipipe_current_domain->priority <= border_ipd->priority) ||
+	    !per_cpu(ipipe_percpu_context_check, ipipe_processor_id()))
+		return;
+
+	ipipe_context_check_off();
+
+	ipipe_trace_panic_freeze();
+	ipipe_set_printk_sync(ipipe_current_domain);
+	printk(KERN_ERR "I-pipe: Detected illicit call from domain '%s'\n"
+	       KERN_ERR "        into a service reserved for domain '%s' and "
+			"below.\n",
+	       ipipe_current_domain->name, border_ipd->name);
+	show_stack(NULL, NULL);
+	ipipe_trace_panic_dump();
+}
+
+EXPORT_SYMBOL(ipipe_check_context);
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+EXPORT_SYMBOL(ipipe_virtualize_irq);
+EXPORT_SYMBOL(ipipe_control_irq);
+EXPORT_SYMBOL(ipipe_suspend_domain);
+EXPORT_SYMBOL(ipipe_alloc_virq);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray);
+EXPORT_SYMBOL(ipipe_root);
+EXPORT_SYMBOL(ipipe_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_unstall_pipeline_from);
+EXPORT_SYMBOL(ipipe_restore_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from);
+EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
+EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
+EXPORT_SYMBOL(__ipipe_unstall_root);
+EXPORT_SYMBOL(__ipipe_restore_root);
+EXPORT_SYMBOL(__ipipe_spin_lock_irq);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irq);
+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore);
+EXPORT_SYMBOL(__ipipe_pipeline);
+EXPORT_SYMBOL(__ipipe_lock_irq);
+EXPORT_SYMBOL(__ipipe_unlock_irq);
+EXPORT_SYMBOL(ipipe_register_domain);
+EXPORT_SYMBOL(ipipe_unregister_domain);
+EXPORT_SYMBOL(ipipe_free_virq);
+EXPORT_SYMBOL(ipipe_init_attr);
+EXPORT_SYMBOL(ipipe_catch_event);
+EXPORT_SYMBOL(ipipe_alloc_ptdkey);
+EXPORT_SYMBOL(ipipe_free_ptdkey);
+EXPORT_SYMBOL(ipipe_set_ptd);
+EXPORT_SYMBOL(ipipe_get_ptd);
+EXPORT_SYMBOL(ipipe_set_irq_affinity);
+EXPORT_SYMBOL(ipipe_send_ipi);
+EXPORT_SYMBOL(__ipipe_schedule_irq);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+EXPORT_SYMBOL(ipipe_request_tickdev);
+EXPORT_SYMBOL(ipipe_release_tickdev);
+#endif
Index: linux-2.6.23/kernel/ipipe/tracer.c
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/tracer.c
@@ -0,0 +1,1342 @@
+/* -*- linux-c -*-
+ * kernel/ipipe/tracer.c
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/vmalloc.h>
+#include <linux/utsrelease.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+#include <asm/uaccess.h>
+
+#define IPIPE_TRACE_PATHS           4 /* <!> Do not lower below 3 */
+#define IPIPE_DEFAULT_ACTIVE        0
+#define IPIPE_DEFAULT_MAX           1
+#define IPIPE_DEFAULT_FROZEN        2
+
+#define IPIPE_TRACE_POINTS          (1 << CONFIG_IPIPE_TRACE_SHIFT)
+#define WRAP_POINT_NO(point)        ((point) & (IPIPE_TRACE_POINTS-1))
+
+#define IPIPE_DEFAULT_PRE_TRACE     10
+#define IPIPE_DEFAULT_POST_TRACE    10
+#define IPIPE_DEFAULT_BACK_TRACE    100
+
+#define IPIPE_DELAY_NOTE            1000  /* in nanoseconds */
+#define IPIPE_DELAY_WARN            10000 /* in nanoseconds */
+
+#define IPIPE_TFLG_NMI_LOCK         0x0001
+#define IPIPE_TFLG_NMI_HIT          0x0002
+#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
+
+#define IPIPE_TFLG_HWIRQ_OFF        0x0100
+#define IPIPE_TFLG_FREEZING         0x0200
+#define IPIPE_TFLG_CURRDOM_SHIFT    10   /* bits 10..11: current domain */
+#define IPIPE_TFLG_CURRDOM_MASK     0x0C00
+#define IPIPE_TFLG_DOMSTATE_SHIFT   12   /* bits 12..15: domain stalled? */
+#define IPIPE_TFLG_DOMSTATE_BITS    3
+
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
+	(point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
+	((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
+
+
+struct ipipe_trace_point{
+	short type;
+	short flags;
+	unsigned long eip;
+	unsigned long parent_eip;
+	unsigned long v;
+	unsigned long long timestamp;
+};
+
+struct ipipe_trace_path{
+	volatile int flags;
+	int dump_lock; /* separated from flags due to cross-cpu access */
+	int trace_pos; /* next point to fill */
+	int begin, end; /* finalised path begin and end */
+	int post_trace; /* non-zero when in post-trace phase */
+	unsigned long long length; /* max path length in cycles */
+	unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
+	unsigned long nmi_saved_parent_eip;
+	unsigned long nmi_saved_v;
+	struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
+} ____cacheline_aligned_in_smp;
+
+enum ipipe_trace_type
+{
+	IPIPE_TRACE_FUNC = 0,
+	IPIPE_TRACE_BEGIN,
+	IPIPE_TRACE_END,
+	IPIPE_TRACE_FREEZE,
+	IPIPE_TRACE_SPECIAL,
+	IPIPE_TRACE_PID,
+};
+
+#define IPIPE_TYPE_MASK             0x0007
+#define IPIPE_TYPE_BITS             3
+
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+
+static struct ipipe_trace_path *trace_paths[NR_CPUS];
+
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
+
+static struct ipipe_trace_path trace_paths[NR_CPUS][IPIPE_TRACE_PATHS] =
+	{ [0 ... NR_CPUS-1] =
+		{ [0 ... IPIPE_TRACE_PATHS-1] =
+			{ .begin = -1, .end = -1 }
+		}
+	};
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+int ipipe_trace_enable = 0;
+
+static int active_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_ACTIVE };
+static int max_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_MAX };
+static int frozen_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_FROZEN };
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
+static int verbose_trace = 1;
+static unsigned long trace_overhead;
+
+static unsigned long trigger_begin;
+static unsigned long trigger_end;
+
+static DEFINE_MUTEX(out_mutex);
+static struct ipipe_trace_path *print_path;
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+static struct ipipe_trace_path *panic_path;
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+static int print_pre_trace;
+static int print_post_trace;
+
+
+static long __ipipe_signed_tsc2us(long long tsc);
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
+
+
+static notrace void
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *pos;
+	int i = 0;
+
+	list_for_each_prev(pos, &__ipipe_pipeline) {
+		ipd = list_entry(pos, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)))
+			point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT);
+
+		if (ipd == ipipe_current_domain)
+			point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT;
+
+		if (++i > IPIPE_TFLG_DOMSTATE_BITS)
+			break;
+	}
+}
+
+static notrace int __ipipe_get_free_trace_path(int old, int cpu_id)
+{
+	int new_active = old;
+	struct ipipe_trace_path *tp;
+
+	do {
+		if (++new_active == IPIPE_TRACE_PATHS)
+			new_active = 0;
+		tp = &trace_paths[cpu_id][new_active];
+	} while ((new_active == max_path[cpu_id]) ||
+	         (new_active == frozen_path[cpu_id]) ||
+	         tp->dump_lock);
+
+	return new_active;
+}
+
+static notrace void
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
+                          struct ipipe_trace_path *old_tp, int old_pos)
+{
+	int i;
+
+	new_tp->trace_pos = pre_trace+1;
+
+	for (i = new_tp->trace_pos; i > 0; i--)
+		memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
+		       &old_tp->point[WRAP_POINT_NO(old_pos-i)],
+		       sizeof(struct ipipe_trace_point));
+
+	/* mark the end (i.e. the point before point[0]) invalid */
+	new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_end(int cpu_id, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = active_path[cpu_id];
+	unsigned long long length;
+
+	/* do we have a new worst case? */
+	length = tp->point[tp->end].timestamp -
+	         tp->point[tp->begin].timestamp;
+	if (length > (trace_paths[cpu_id][max_path[cpu_id]]).length) {
+		/* we need protection here against other cpus trying
+		   to start a proc dump */
+		spin_lock(&global_path_lock);
+
+		/* active path holds new worst case */
+		tp->length = length;
+		max_path[cpu_id] = active;
+
+		/* find next unused trace path */
+		active = __ipipe_get_free_trace_path(active, cpu_id);
+
+		spin_unlock(&global_path_lock);
+
+		tp = &trace_paths[cpu_id][active];
+
+		/* migrate last entries for pre-tracing */
+		__ipipe_migrate_pre_trace(tp, old_tp, pos);
+	}
+
+	return tp;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_freeze(int cpu_id, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = active_path[cpu_id];
+	int i;
+
+	/* frozen paths have no core (begin=end) */
+	tp->begin = tp->end;
+
+	/* we need protection here against other cpus trying
+	 * to set their frozen path or to start a proc dump */
+	spin_lock(&global_path_lock);
+
+	frozen_path[cpu_id] = active;
+
+	/* find next unused trace path */
+	active = __ipipe_get_free_trace_path(active, cpu_id);
+
+	/* check if this is the first frozen path */
+	for (i = 0; i < NR_CPUS; i++) {
+		if ((i != cpu_id) &&
+		    (trace_paths[i][frozen_path[i]].end >= 0))
+			tp->end = -1;
+	}
+
+	spin_unlock(&global_path_lock);
+
+	tp = &trace_paths[cpu_id][active];
+
+	/* migrate last entries for pre-tracing */
+	__ipipe_migrate_pre_trace(tp, old_tp, pos);
+
+	return tp;
+}
+
+void notrace
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
+              unsigned long parent_eip, unsigned long v)
+{
+	struct ipipe_trace_path *tp, *old_tp;
+	int pos, next_pos, begin;
+	struct ipipe_trace_point *point;
+	unsigned long flags;
+	int cpu_id;
+
+	local_irq_save_hw_notrace(flags);
+
+	cpu_id = ipipe_processor_id();
+ restart:
+	tp = old_tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	/* here starts a race window with NMIs - catched below */
+
+	/* check for NMI recursion */
+	if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
+		tp->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* first freeze request from NMI context? */
+		if ((type == IPIPE_TRACE_FREEZE) &&
+		    !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
+			/* save arguments and mark deferred freezing */
+			tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
+			tp->nmi_saved_eip = eip;
+			tp->nmi_saved_parent_eip = parent_eip;
+			tp->nmi_saved_v = v;
+		}
+		return; /* no need for restoring flags inside IRQ */
+	}
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (unlikely(tp != &trace_paths[cpu_id][active_path[cpu_id]])) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	/* get the point buffer */
+	pos = tp->trace_pos;
+	point = &tp->point[pos];
+
+	/* store all trace point data */
+	point->type = type;
+	point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
+	point->eip = eip;
+	point->parent_eip = parent_eip;
+	point->v = v;
+	ipipe_read_tsc(point->timestamp);
+
+	__ipipe_store_domain_states(point);
+
+	/* forward to next point buffer */
+	next_pos = WRAP_POINT_NO(pos+1);
+	tp->trace_pos = next_pos;
+
+	/* only mark beginning if we haven't started yet */
+	begin = tp->begin;
+	if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
+		tp->begin = pos;
+
+	/* end of critical path, start post-trace if not already started */
+	if (unlikely(type == IPIPE_TRACE_END) &&
+	    (begin >= 0) && !tp->post_trace)
+		tp->post_trace = post_trace + 1;
+
+	/* freeze only if the slot is free and we are not already freezing */
+	if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
+	     (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
+	     type == IPIPE_TRACE_FUNC)) &&
+	    (trace_paths[cpu_id][frozen_path[cpu_id]].begin < 0) &&
+	    !(tp->flags & IPIPE_TFLG_FREEZING)) {
+		tp->post_trace = post_trace + 1;
+		tp->flags |= IPIPE_TFLG_FREEZING;
+	}
+
+	/* enforce end of trace in case of overflow */
+	if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
+		tp->end = pos;
+		goto enforce_end;
+	}
+
+	/* stop tracing this path if we are in post-trace and
+	 *  a) that phase is over now or
+	 *  b) a new TRACE_BEGIN came in but we are not freezing this path */
+	if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
+	             ((type == IPIPE_TRACE_BEGIN) &&
+	              !(tp->flags & IPIPE_TFLG_FREEZING))))) {
+		/* store the path's end (i.e. excluding post-trace) */
+		tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
+
+ enforce_end:
+		if (tp->flags & IPIPE_TFLG_FREEZING)
+			tp = __ipipe_trace_freeze(cpu_id, tp, pos);
+		else
+			tp = __ipipe_trace_end(cpu_id, tp, pos);
+
+		/* reset the active path, maybe already start a new one */
+		tp->begin = (type == IPIPE_TRACE_BEGIN) ?
+			WRAP_POINT_NO(tp->trace_pos - 1) : -1;
+		tp->end = -1;
+		tp->post_trace = 0;
+		tp->flags = 0;
+
+		/* update active_path not earlier to avoid races with NMIs */
+		active_path[cpu_id] = tp - trace_paths[cpu_id];
+	}
+
+	/* we still have old_tp and point,
+	 * let's reset NMI lock and check for catches */
+	old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+	if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
+		/* well, this late tagging may not immediately be visible for
+		 * other cpus already dumping this path - a minor issue */
+		point->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* handle deferred freezing from NMI context */
+		if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+			__ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
+			              old_tp->nmi_saved_parent_eip,
+			              old_tp->nmi_saved_v);
+	}
+
+	local_irq_restore_hw_notrace(flags);
+}
+
+static unsigned long __ipipe_global_path_lock(void)
+{
+	unsigned long flags;
+	int cpu_id;
+	struct ipipe_trace_path *tp;
+
+	spin_lock_irqsave(&global_path_lock, flags);
+
+	cpu_id = ipipe_processor_id();
+ restart:
+	tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	/* here is small race window with NMIs - catched below */
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (tp != &trace_paths[cpu_id][active_path[cpu_id]]) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	return flags;
+}
+
+static void __ipipe_global_path_unlock(unsigned long flags)
+{
+	int cpu_id;
+	struct ipipe_trace_path *tp;
+
+	/* release spinlock first - it's not involved in the NMI issue */
+	__ipipe_spin_unlock_irqbegin(&global_path_lock);
+
+	cpu_id = ipipe_processor_id();
+	tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+	/* handle deferred freezing from NMI context */
+	if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+		__ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
+		              tp->nmi_saved_parent_eip, tp->nmi_saved_v);
+
+	/* See __ipipe_spin_lock_irqsave() and friends. */
+	__ipipe_spin_unlock_irqcomplete(flags);
+}
+
+void notrace ipipe_trace_begin(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_begin);
+
+void notrace ipipe_trace_end(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_end);
+
+void notrace ipipe_trace_freeze(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_freeze);
+
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_special);
+
+void notrace ipipe_trace_pid(pid_t pid, short prio)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, pid);
+}
+EXPORT_SYMBOL(ipipe_trace_pid);
+
+int ipipe_trace_max_reset(void)
+{
+	int cpu_id;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) {
+		path = &trace_paths[cpu_id][max_path[cpu_id]];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin     = -1;
+		path->end       = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_max_reset);
+
+int ipipe_trace_frozen_reset(void)
+{
+	int cpu_id;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_online_cpu(cpu_id) {
+		path = &trace_paths[cpu_id][frozen_path[cpu_id]];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin = -1;
+		path->end = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_frozen_reset);
+
+static void
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
+                      int trylock)
+{
+	struct task_struct *task = NULL;
+	char buf[8];
+	int i;
+	int locked = 1;
+
+	if (trylock) {
+		if (!read_trylock(&tasklist_lock))
+			locked = 0;
+	} else
+		read_lock(&tasklist_lock);
+
+	if (locked)
+		task = find_task_by_pid((pid_t)point->v);
+
+	if (task)
+		strncpy(task_info, task->comm, 11);
+	else
+		strcpy(task_info, "-<?>-");
+
+	if (locked)
+		read_unlock(&tasklist_lock);
+
+	for (i = strlen(task_info); i < 11; i++)
+		task_info[i] = ' ';
+
+	sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
+	strcpy(task_info + (11 - strlen(buf)), buf);
+}
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void)
+{
+	unsigned long flags;
+	int cpu_id;
+
+	if (!ipipe_trace_enable)
+		return;
+
+	ipipe_trace_enable = 0;
+	local_irq_save_hw_notrace(flags);
+
+	cpu_id = ipipe_processor_id();
+
+	panic_path = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	local_irq_restore_hw(flags);
+}
+EXPORT_SYMBOL(ipipe_trace_panic_freeze);
+
+void ipipe_trace_panic_dump(void)
+{
+	int cnt = back_trace;
+	int start, pos;
+	char task_info[12];
+
+	if (!panic_path)
+		return;
+
+	ipipe_context_check_off();
+
+	printk("I-pipe tracer log (%d points):\n", cnt);
+
+	start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
+
+	while (cnt-- > 0) {
+		struct ipipe_trace_point *point = &panic_path->point[pos];
+		long time;
+		char buf[16];
+		int i;
+
+		printk(" %c",
+		       (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+		for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+			printk("%c",
+			       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'#' : '+') :
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'*' : ' '));
+
+		if (!point->eip)
+			printk("-<invalid>-\n");
+		else {
+			__ipipe_trace_point_type(buf, point);
+			printk(buf);
+
+			switch (point->type & IPIPE_TYPE_MASK) {
+				case IPIPE_TRACE_FUNC:
+					printk("           ");
+					break;
+
+				case IPIPE_TRACE_PID:
+					__ipipe_get_task_info(task_info,
+							      point, 1);
+					printk(task_info);
+					break;
+
+				default:
+					printk("0x%08lx ", point->v);
+			}
+
+			time = __ipipe_signed_tsc2us(point->timestamp -
+				panic_path->point[start].timestamp);
+			printk(" %5ld ", time);
+
+			__ipipe_print_symname(NULL, point->eip);
+			printk(" (");
+			__ipipe_print_symname(NULL, point->parent_eip);
+			printk(")\n");
+		}
+		pos = WRAP_POINT_NO(pos - 1);
+	}
+
+	panic_path = NULL;
+}
+EXPORT_SYMBOL(ipipe_trace_panic_dump);
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+
+
+/* --- /proc output --- */
+
+static notrace int __ipipe_in_critical_trpath(long point_no)
+{
+	return ((WRAP_POINT_NO(point_no-print_path->begin) <
+	         WRAP_POINT_NO(print_path->end-print_path->begin)) ||
+	        ((print_path->end == print_path->begin) &&
+	         (WRAP_POINT_NO(point_no-print_path->end) >
+	          print_post_trace)));
+}
+
+static long __ipipe_signed_tsc2us(long long tsc)
+{
+        unsigned long long abs_tsc;
+        long us;
+
+	/* ipipe_tsc2us works on unsigned => handle sign separately */
+        abs_tsc = (tsc >= 0) ? tsc : -tsc;
+        us = ipipe_tsc2us(abs_tsc);
+        if (tsc < 0)
+                return -us;
+        else
+                return us;
+}
+
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
+{
+	switch (point->type & IPIPE_TYPE_MASK) {
+		case IPIPE_TRACE_FUNC:
+			strcpy(buf, "func    ");
+			break;
+
+		case IPIPE_TRACE_BEGIN:
+			strcpy(buf, "begin   ");
+			break;
+
+		case IPIPE_TRACE_END:
+			strcpy(buf, "end     ");
+			break;
+
+		case IPIPE_TRACE_FREEZE:
+			strcpy(buf, "freeze  ");
+			break;
+
+		case IPIPE_TRACE_SPECIAL:
+			sprintf(buf, "(0x%02x)  ",
+				point->type >> IPIPE_TYPE_BITS);
+			break;
+
+		case IPIPE_TRACE_PID:
+			sprintf(buf, "[%5d] ", (pid_t)point->v);
+			break;
+	}
+}
+
+static void
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	char mark = ' ';
+	int point_no = point - print_path->point;
+	int i;
+
+	if (print_path->end == point_no)
+		mark = '<';
+	else if (print_path->begin == point_no)
+		mark = '>';
+	else if (__ipipe_in_critical_trpath(point_no))
+		mark = ':';
+	seq_printf(m, "%c%c", mark,
+	           (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+	if (!verbose_trace)
+		return;
+
+	for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+		seq_printf(m, "%c",
+			(IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+			    (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+				'#' : '+') :
+			(IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
+}
+
+static void
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	unsigned long delay = 0;
+	int next;
+	char *mark = "  ";
+
+	next = WRAP_POINT_NO(point+1 - print_path->point);
+
+	if (next != print_path->trace_pos)
+		delay = ipipe_tsc2ns(print_path->point[next].timestamp -
+		                     point->timestamp);
+
+	if (__ipipe_in_critical_trpath(point - print_path->point)) {
+		if (delay > IPIPE_DELAY_WARN)
+			mark = "! ";
+		else if (delay > IPIPE_DELAY_NOTE)
+			mark = "+ ";
+	}
+	seq_puts(m, mark);
+
+	if (verbose_trace)
+		seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
+		           (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
+	else
+		seq_puts(m, " ");
+}
+
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long size, offset;
+	const char *sym_name;
+	char *modname;
+
+	sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+	if (!m) {
+		/* panic dump */
+		if (sym_name) {
+			printk("%s+0x%lx", sym_name, offset);
+			if (modname)
+				printk(" [%s]", modname);
+		}
+	} else
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+	{
+		if (sym_name) {
+			if (verbose_trace) {
+				seq_printf(m, "%s+0x%lx", sym_name, offset);
+				if (modname)
+					seq_printf(m, " [%s]", modname);
+			} else
+				seq_puts(m, sym_name);
+		} else
+			seq_printf(m, "<%08lx>", eip);
+	}
+}
+
+static void __ipipe_print_headline(struct seq_file *m)
+{
+	seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
+		   "us\n\n", trace_overhead/1000, trace_overhead%1000);
+
+	if (verbose_trace) {
+		const char *name[4] = { [0 ... 3] = "<unused>" };
+		struct list_head *pos;
+		int i = 0;
+
+		list_for_each_prev(pos, &__ipipe_pipeline) {
+			struct ipipe_domain *ipd =
+				list_entry(pos, struct ipipe_domain, p_link);
+
+			name[i] = ipd->name;
+			if (++i > 3)
+				break;
+		}
+
+		seq_printf(m,
+		           " +----- Hard IRQs ('|': locked)\n"
+		           " |+---- %s\n"
+		           " ||+--- %s\n"
+		           " |||+-- %s\n"
+		           " ||||+- %s%s\n"
+		           " |||||                        +---------- "
+		               "Delay flag ('+': > %d us, '!': > %d us)\n"
+		           " |||||                        |        +- "
+		               "NMI noise ('N')\n"
+		           " |||||                        |        |\n"
+		           "      Type    User Val.   Time    Delay  Function "
+		               "(Parent)\n",
+		           name[3], name[2], name[1], name[0],
+		           name[0] ? " ('*': domain stalled, '+': current, "
+		               "'#': current+stalled)" : "",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+	} else
+		seq_printf(m,
+		           " +--------------- Hard IRQs ('|': locked)\n"
+		           " |             +- Delay flag "
+		               "('+': > %d us, '!': > %d us)\n"
+		           " |             |\n"
+		           "  Type     Time   Function (Parent)\n",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+}
+
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *path;
+		unsigned long length_usecs;
+		int points, cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the longest of all per-cpu paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			path = &trace_paths[cpu][max_path[cpu]];
+			if ((print_path == NULL) ||
+			    (path->length > print_path->length)) {
+				print_path = path;
+				break;
+			}
+		}
+		print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		/* does this path actually contain data? */
+		if (print_path->end == print_path->begin)
+			return NULL;
+
+		/* number of points inside the critical path */
+		points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
+
+		/* pre- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, pre-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = pre_trace;
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
+				print_post_trace;
+
+		length_usecs = ipipe_tsc2us(print_path->length);
+		seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
+			"%d (-%d/+%d), Length: %lu us\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			points, print_pre_trace, print_post_trace, length_usecs);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + n)];
+}
+
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	loff_t n = ++*pos;
+
+	/* check if we are inside the trace range with the next entry */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + *pos)];
+}
+
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
+{
+	if (print_path)
+		print_path->dump_lock = 0;
+	mutex_unlock(&out_mutex);
+}
+
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
+{
+	long time;
+	struct ipipe_trace_point *point = p;
+	char buf[16];
+
+	if (!point->eip) {
+		seq_puts(m, "-<invalid>-\n");
+		return 0;
+	}
+
+	__ipipe_print_pathmark(m, point);
+	__ipipe_trace_point_type(buf, point);
+	seq_puts(m, buf);
+	if (verbose_trace)
+		switch (point->type & IPIPE_TYPE_MASK) {
+			case IPIPE_TRACE_FUNC:
+				seq_puts(m, "           ");
+				break;
+
+			case IPIPE_TRACE_PID:
+				__ipipe_get_task_info(buf, point, 0);
+				seq_puts(m, buf);
+				break;
+
+			default:
+				seq_printf(m, "0x%08lx ", point->v);
+		}
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+		print_path->point[print_path->begin].timestamp);
+	seq_printf(m, "%5ld", time);
+
+	__ipipe_print_delay(m, point);
+	__ipipe_print_symname(m, point->eip);
+	seq_puts(m, " (");
+	__ipipe_print_symname(m, point->parent_eip);
+	seq_puts(m, ")\n");
+
+	return 0;
+}
+
+static struct seq_operations __ipipe_max_ptrace_ops = {
+	.start = __ipipe_max_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_max_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
+                  size_t count, loff_t *data)
+{
+	mutex_lock(&out_mutex);
+	ipipe_trace_max_reset();
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_max_prtrace_fops = {
+	.open       = __ipipe_max_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_max_reset,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *path;
+		int cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the first of all per-cpu frozen paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			path = &trace_paths[cpu][frozen_path[cpu]];
+			if (path->end >= 0) {
+				print_path = path;
+				break;
+			}
+		}
+		if (print_path)
+			print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		if (!print_path)
+			return NULL;
+
+		/* back- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, back-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = back_trace-1; /* substract freeze point */
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 2 -
+				print_post_trace;
+
+		seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------"
+			"------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			print_pre_trace+1, print_post_trace);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= print_pre_trace + 1 + print_post_trace)
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin-
+	                                        print_pre_trace+n)];
+}
+
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
+	.start = __ipipe_frozen_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_frozen_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
+                    size_t count, loff_t *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, pbuffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	ipipe_trace_frozen_reset();
+	if (val > 0)
+		ipipe_trace_freeze(-1);
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_frozen_prtrace_fops = {
+	.open       = __ipipe_frozen_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_frozen_ctrl,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static int __ipipe_rd_proc_val(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%u\n", *(int *)data);
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer,
+                               unsigned long count, void *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	*(int *)data = val;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count,
+			      int *eof, void *data)
+{
+	int len;
+
+	if (!trigger_begin)
+		return 0;
+
+	len = sprint_symbol(page, trigger_begin);
+	page[len++] = '\n';
+
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer,
+			      unsigned long count, void *data)
+{
+	char buf[KSYM_SYMBOL_LEN];
+	unsigned long begin, end;
+
+	if (count > sizeof(buf) - 1)
+		count = sizeof(buf) - 1;
+	if (copy_from_user(buf, buffer, count) < 0)
+		return -EFAULT;
+	buf[count] = 0;
+	if (buf[count-1] == '\n')
+		buf[count-1] = 0;
+
+	begin = kallsyms_lookup_name(buf);
+	if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
+		return -ENOENT;
+	end += begin - 1;
+
+	mutex_lock(&out_mutex);
+	/* invalidate the current range before setting a new one */
+	trigger_end = 0;
+	wmb();
+	/* set new range */
+	trigger_begin = begin;
+	wmb();
+	trigger_end = end;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+extern struct proc_dir_entry *ipipe_proc_root;
+
+static void __init
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
+                              const char *name, int *value_ptr)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry(name, 0644, trace_dir);
+	if (entry) {
+		entry->data = value_ptr;
+		entry->read_proc = __ipipe_rd_proc_val;
+		entry->write_proc = __ipipe_wr_proc_val;
+		entry->owner = THIS_MODULE;
+	}
+}
+
+void __init __ipipe_init_tracer(void)
+{
+	struct proc_dir_entry *trace_dir;
+	struct proc_dir_entry *entry;
+	unsigned long long start, end, min = ULLONG_MAX;
+	int i;
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	int cpu, path;
+
+	for_each_possible_cpu(cpu) {
+		trace_paths[cpu] = vmalloc(
+			sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		if (trace_paths[cpu] == NULL) {
+			printk(KERN_ERR "I-pipe: "
+			       "insufficient memory for trace buffer.\n");
+			return;
+		}
+		memset(trace_paths[cpu], 0,
+			sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
+			trace_paths[cpu][path].begin = -1;
+			trace_paths[cpu][path].end   = -1;
+		}
+	}
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+	ipipe_trace_enable = CONFIG_IPIPE_TRACE_ENABLE_VALUE;
+
+	/* Calculate minimum overhead of __ipipe_trace() */
+	local_irq_disable_hw();
+	for (i = 0; i < 100; i++) {
+		ipipe_read_tsc(start);
+		__ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
+			      __BUILTIN_RETURN_ADDRESS1, 0);
+		ipipe_read_tsc(end);
+
+		end -= start;
+		if (end < min)
+			min = end;
+	}
+	local_irq_enable_hw();
+	trace_overhead = ipipe_tsc2ns(min);
+
+	trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root);
+
+	entry = create_proc_entry("max", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_max_prtrace_fops;
+
+	entry = create_proc_entry("frozen", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_frozen_prtrace_fops;
+
+	entry = create_proc_entry("trigger", 0644, trace_dir);
+	if (entry) {
+		entry->read_proc = __ipipe_rd_trigger;
+		entry->write_proc = __ipipe_wr_trigger;
+		entry->owner = THIS_MODULE;
+	}
+
+	__ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
+	                              &pre_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
+	                              &post_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
+	                              &back_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "verbose",
+	                              &verbose_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "enable",
+	                              &ipipe_trace_enable);
+}
Index: linux-2.6.23/kernel/irq/chip.c
===================================================================
--- linux-2.6.23.orig/kernel/irq/chip.c
+++ linux-2.6.23/kernel/irq/chip.c
@@ -340,7 +340,9 @@ handle_level_irq(unsigned int irq, struc
 	irqreturn_t action_ret;
 
 	spin_lock(&desc->lock);
+#ifndef CONFIG_IPIPE
 	mask_ack_irq(desc, irq);
+#endif /* CONFIG_IPIPE */
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -417,8 +419,13 @@ handle_fasteoi_irq(unsigned int irq, str
 
 	spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
+#ifdef CONFIG_IPIPE
+	desc->chip->unmask(irq);
+out:
+#else
 out:
 	desc->chip->eoi(irq);
+#endif
 
 	spin_unlock(&desc->lock);
 }
@@ -462,8 +469,10 @@ handle_edge_irq(unsigned int irq, struct
 
 	kstat_cpu(cpu).irqs[irq]++;
 
+#ifndef CONFIG_IPIPE
 	/* Start handling the irq */
 	desc->chip->ack(irq);
+#endif /* CONFIG_IPIPE */
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -503,6 +512,69 @@ out_unlock:
 	spin_unlock(&desc->lock);
 }
 
+#ifdef CONFIG_IPIPE
+
+void fastcall __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	mask_ack_irq(desc, irq);
+}
+
+void fastcall __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->unmask)
+		desc->chip->unmask(irq);
+}
+
+void fastcall __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->eoi(irq);
+}
+
+void fastcall __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->unmask(irq);
+}
+
+void fastcall __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->ack(irq);
+}
+
+void fastcall __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc)
+{
+	static int done;
+
+	handle_bad_irq(irq, desc);
+
+	if (!done) {
+		printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n",
+		       __FUNCTION__, irq);
+		done = 1;
+	}
+}
+
+void fastcall __ipipe_noack_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_noend_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_SMP
 /**
  *	handle_percpu_IRQ - Per CPU local irq handler
@@ -518,8 +590,10 @@ handle_percpu_irq(unsigned int irq, stru
 
 	kstat_this_cpu.irqs[irq]++;
 
+#ifndef CONFIG_IPIPE
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
+#endif /* CONFIG_IPIPE */
 
 	action_ret = handle_IRQ_event(irq, desc->action);
 	if (!noirqdebug)
@@ -529,6 +603,22 @@ handle_percpu_irq(unsigned int irq, stru
 		desc->chip->eoi(irq);
 }
 
+#ifdef CONFIG_IPIPE
+
+void fastcall __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->ack)
+		desc->chip->ack(irq);
+}
+
+void fastcall __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->eoi)
+		desc->chip->eoi(irq);
+}
+
+#endif /* CONFIG_IPIPE */
+
 #endif /* CONFIG_SMP */
 
 void
@@ -548,6 +638,30 @@ __set_irq_handler(unsigned int irq, irq_
 
 	if (!handle)
 		handle = handle_bad_irq;
+#ifdef CONFIG_IPIPE
+	else if (handle == &handle_simple_irq) {
+		desc->ipipe_ack = &__ipipe_ack_simple_irq;
+		desc->ipipe_end = &__ipipe_end_simple_irq;
+	}
+	else if (handle == &handle_level_irq) {
+		desc->ipipe_ack = &__ipipe_ack_level_irq;
+		desc->ipipe_end = &__ipipe_end_level_irq;
+	}
+	else if (handle == &handle_edge_irq) {
+		desc->ipipe_ack = &__ipipe_ack_edge_irq;
+		desc->ipipe_end = &__ipipe_end_edge_irq;
+	}
+	else if (handle == &handle_fasteoi_irq) {
+		desc->ipipe_ack = &__ipipe_ack_fasteoi_irq;
+		desc->ipipe_end = &__ipipe_end_fasteoi_irq;
+	}
+#ifdef CONFIG_SMP
+	else if (handle == &handle_percpu_irq) {
+		desc->ipipe_ack = &__ipipe_ack_percpu_irq;
+		desc->ipipe_end = &__ipipe_end_percpu_irq;
+	}
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_IPIPE */
 	else if (desc->chip == &no_irq_chip) {
 		printk(KERN_WARNING "Trying to install %sinterrupt handler "
 		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
@@ -559,7 +673,17 @@ __set_irq_handler(unsigned int irq, irq_
 		 * dummy_irq_chip for easy transition.
 		 */
 		desc->chip = &dummy_irq_chip;
+#ifdef CONFIG_IPIPE
+		desc->ipipe_ack = &__ipipe_noack_irq;
+		desc->ipipe_end = &__ipipe_noend_irq;
+#endif /* CONFIG_IPIPE */
 	}
+#ifdef CONFIG_IPIPE
+ 	else {
+ 		desc->ipipe_ack = &__ipipe_ack_bad_irq;
+ 		desc->ipipe_end = &__ipipe_noend_irq;
+ 	}
+#endif /* CONFIG_IPIPE */
 
 	spin_lock_irqsave(&desc->lock, flags);
 
@@ -569,6 +693,10 @@ __set_irq_handler(unsigned int irq, irq_
 			mask_ack_irq(desc, irq);
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
+#ifdef CONFIG_IPIPE
+		desc->ipipe_ack = &__ipipe_ack_bad_irq;
+		desc->ipipe_end = &__ipipe_noend_irq;
+#endif /* CONFIG_IPIPE */
 	}
 	desc->handle_irq = handle;
 	desc->name = name;
Index: linux-2.6.23/kernel/power/swsusp.c
===================================================================
--- linux-2.6.23.orig/kernel/power/swsusp.c
+++ linux-2.6.23/kernel/power/swsusp.c
@@ -278,6 +278,7 @@ int swsusp_suspend(void)
 		return error;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 	/* At this point, device_suspend() has been called, but *not*
 	 * device_power_down(). We *must* device_power_down() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -299,6 +300,7 @@ int swsusp_suspend(void)
 	 */
 	device_power_up();
  Enable_irqs:
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 	return error;
 }
@@ -308,6 +310,7 @@ int swsusp_resume(void)
 	int error;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 	/* NOTE:  device_power_down() is just a suspend() with irqs off;
 	 * it has no special "power things down" semantics
 	 */
@@ -334,6 +337,7 @@ int swsusp_resume(void)
 	restore_processor_state();
 	touch_softlockup_watchdog();
 	device_power_up();
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 	return error;
 }
Index: linux-2.6.23/kernel/printk.c
===================================================================
--- linux-2.6.23.orig/kernel/printk.c
+++ linux-2.6.23/kernel/printk.c
@@ -504,6 +504,82 @@ static int have_callable_console(void)
  * printf(3)
  */
 
+#ifdef CONFIG_IPIPE
+
+static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED;
+
+static int __ipipe_printk_fill;
+
+static char __ipipe_printk_buf[__LOG_BUF_LEN];
+
+void __ipipe_flush_printk (unsigned virq, void *cookie)
+{
+	char *p = __ipipe_printk_buf;
+	int len, lmax, out = 0;
+	unsigned long flags;
+
+	goto start;
+
+	do {
+		spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+ start:
+		lmax = __ipipe_printk_fill;
+		while (out < lmax) {
+			len = strlen(p) + 1;
+			printk("%s",p);
+			p += len;
+			out += len;
+		}
+		spin_lock_irqsave(&__ipipe_printk_lock, flags);
+	}
+	while (__ipipe_printk_fill != lmax);
+
+	__ipipe_printk_fill = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+  	int r, fbytes, oldcount, cs = -1;
+    	unsigned long flags;
+	va_list args;
+
+	va_start(args, fmt);
+
+	if (test_bit(IPIPE_SPRINTK_FLAG,&ipipe_current_domain->flags) ||
+	    oops_in_progress)
+		cs = ipipe_disable_context_check(ipipe_processor_id());
+
+	if (ipipe_current_domain == ipipe_root_domain || cs != -1) {
+		r = vprintk(fmt, args);
+		if (cs != -1)
+			ipipe_restore_context_check(ipipe_processor_id(), cs);
+		goto out;
+	}
+
+	spin_lock_irqsave(&__ipipe_printk_lock, flags);
+
+	oldcount = __ipipe_printk_fill;
+	fbytes = __LOG_BUF_LEN - oldcount;
+
+	if (fbytes > 1)	{
+		r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
+			       fbytes, fmt, args) + 1; /* account for the null byte */
+		__ipipe_printk_fill += r;
+	} else
+		r = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+
+	if (oldcount == 0)
+		ipipe_trigger_irq(__ipipe_printk_virq);
+out:
+	va_end(args);
+
+	return r;
+}
+#else /* !CONFIG_IPIPE */
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -515,6 +591,7 @@ asmlinkage int printk(const char *fmt, .
 
 	return r;
 }
+#endif /* CONFIG_IPIPE */
 
 /* cpu currently holding logbuf_lock */
 static volatile unsigned int printk_cpu = UINT_MAX;
Index: linux-2.6.23/kernel/sched.c
===================================================================
--- linux-2.6.23.orig/kernel/sched.c
+++ linux-2.6.23/kernel/sched.c
@@ -1436,7 +1436,7 @@ static int try_to_wake_up(struct task_st
 
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
-	if (!(old_state & state))
+	if (!(old_state & state) || (old_state & TASK_NOWAKEUP))
 		goto out;
 
 	if (p->se.on_rq)
@@ -1850,13 +1850,15 @@ asmlinkage void schedule_tail(struct tas
 #endif
 	if (current->set_child_tid)
 		put_user(current->pid, current->set_child_tid);
+
+	ipipe_init_notify(current);
 }
 
 /*
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline void
+static inline int
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -1897,12 +1899,17 @@ context_switch(struct rq *rq, struct tas
 	switch_to(prev, next, prev);
 
 	barrier();
+
+	if (task_hijacked(prev))
+		return 1;
 	/*
 	 * this_rq must be evaluated again because prev may have moved
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
 	finish_task_switch(this_rq(), prev);
+
+	return 0;
 }
 
 /*
@@ -3474,6 +3481,8 @@ asmlinkage void __sched schedule(void)
 	struct rq *rq;
 	int cpu;
 
+	ipipe_check_context(ipipe_root_domain);
+
 need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
@@ -3481,6 +3490,11 @@ need_resched:
 	rcu_qsctr_inc(cpu);
 	prev = rq->curr;
 	switch_count = &prev->nivcsw;
+	if (unlikely(prev->state & TASK_ATOMICSWITCH)) {
+		prev->state &= ~TASK_ATOMICSWITCH;
+		/* Pop one disable level -- one still remains. */
+		preempt_enable();
+	}
 
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
@@ -3514,7 +3528,8 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		context_switch(rq, prev, next); /* unlocks the rq */
+		if (context_switch(rq, prev, next)) /* unlocks the rq unless hijacked */
+			return;
 	} else
 		spin_unlock_irq(&rq->lock);
 
@@ -3542,6 +3557,7 @@ asmlinkage void __sched preempt_schedule
 	struct task_struct *task = current;
 	int saved_lock_depth;
 #endif
+	ipipe_check_context(ipipe_root_domain);
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task.  Just return..
@@ -4265,6 +4281,7 @@ recheck:
 		deactivate_task(rq, p, 0);
 	oldprio = p->prio;
 	__setscheduler(rq, p, policy, param->sched_priority);
+	ipipe_setsched_notify(p);
 	if (on_rq) {
 		activate_task(rq, p, 0);
 		/*
@@ -6739,3 +6756,54 @@ void set_curr_task(int cpu, struct task_
 }
 
 #endif
+
+#ifdef CONFIG_IPIPE
+
+int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio)
+{
+	unsigned long flags;
+	int oldprio, on_rq;
+	struct rq *rq;
+
+	spin_lock_irqsave(&p->pi_lock, flags);
+	rq = __task_rq_lock(p);
+	on_rq = p->se.on_rq;
+	if (on_rq)
+		deactivate_task(rq, p, 0);
+	oldprio = p->prio;
+	__setscheduler(rq, p, policy, prio);
+	ipipe_setsched_notify(p);
+	if (on_rq) {
+		activate_task(rq, p, 0);
+		if (task_running(rq, p)) {
+			if (p->prio > oldprio)
+				resched_task(rq->curr);
+		} else
+			check_preempt_curr(rq, p);
+	}
+	__task_rq_unlock(rq);
+	spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	rt_mutex_adjust_pi(p);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(ipipe_setscheduler_root);
+
+int ipipe_reenter_root (struct task_struct *prev, int policy, int prio)
+{
+	finish_task_switch(this_rq(), prev);
+
+	(void)reacquire_kernel_lock(current);
+	preempt_enable_no_resched();
+
+	if (current->policy != policy || current->rt_priority != prio)
+		return ipipe_setscheduler_root(current, policy, prio);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(ipipe_reenter_root);
+
+#endif /* CONFIG_IPIPE */
Index: linux-2.6.23/kernel/signal.c
===================================================================
--- linux-2.6.23.orig/kernel/signal.c
+++ linux-2.6.23/kernel/signal.c
@@ -455,6 +455,7 @@ void signal_wake_up(struct task_struct *
 	unsigned int mask;
 
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
+	ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */
 
 	/*
 	 * For SIGKILL, we want to wake it up in the stopped/traced case.
Index: linux-2.6.23/kernel/time/clockevents.c
===================================================================
--- linux-2.6.23.orig/kernel/time/clockevents.c
+++ linux-2.6.23/kernel/time/clockevents.c
@@ -84,6 +84,7 @@ int clockevents_program_event(struct clo
 		return -ETIME;
 
 	dev->next_event = expires;
+	dev->delta = delta;
 
 	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
 		return 0;
Index: linux-2.6.23/lib/Kconfig.debug
===================================================================
--- linux-2.6.23.orig/lib/Kconfig.debug
+++ linux-2.6.23/lib/Kconfig.debug
@@ -71,6 +71,8 @@ config HEADERS_CHECK
 	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
 	  your build tree), to make sure they're suitable.
 
+source "kernel/ipipe/Kconfig.debug"
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
Index: linux-2.6.23/lib/bust_spinlocks.c
===================================================================
--- linux-2.6.23.orig/lib/bust_spinlocks.c
+++ linux-2.6.23/lib/bust_spinlocks.c
@@ -12,16 +12,19 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
+#include <linux/ipipe_trace.h>
 
 
 void __attribute__((weak)) bust_spinlocks(int yes)
 {
 	if (yes) {
+ 		ipipe_trace_panic_freeze();
 		oops_in_progress = 1;
 	} else {
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
+		ipipe_trace_panic_dump();
 		oops_in_progress = 0;
 		wake_up_klogd();
 	}
Index: linux-2.6.23/lib/ioremap.c
===================================================================
--- linux-2.6.23.orig/lib/ioremap.c
+++ linux-2.6.23/lib/ioremap.c
@@ -84,8 +84,8 @@ int ioremap_page_range(unsigned long add
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-
-	flush_cache_vmap(start, end);
+	__ipipe_pin_range_globally(start, end);
+ 	flush_cache_vmap(start, end);
 
 	return err;
 }
Index: linux-2.6.23/lib/smp_processor_id.c
===================================================================
--- linux-2.6.23.orig/lib/smp_processor_id.c
+++ linux-2.6.23/lib/smp_processor_id.c
@@ -13,6 +13,9 @@ unsigned int debug_smp_processor_id(void
 	int this_cpu = raw_smp_processor_id();
 	cpumask_t this_mask;
 
+	if (!ipipe_root_domain_p)
+		goto out;
+
 	if (likely(preempt_count))
 		goto out;
 
Index: linux-2.6.23/lib/spinlock_debug.c
===================================================================
--- linux-2.6.23.orig/lib/spinlock_debug.c
+++ linux-2.6.23/lib/spinlock_debug.c
@@ -133,6 +133,8 @@ void _raw_spin_lock(spinlock_t *lock)
 	debug_spin_lock_after(lock);
 }
 
+EXPORT_SYMBOL(_raw_spin_lock);
+
 int _raw_spin_trylock(spinlock_t *lock)
 {
 	int ret = __raw_spin_trylock(&lock->raw_lock);
@@ -148,12 +150,16 @@ int _raw_spin_trylock(spinlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_spin_trylock);
+
 void _raw_spin_unlock(spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
 	__raw_spin_unlock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_spin_unlock);
+
 static void rwlock_bug(rwlock_t *lock, const char *msg)
 {
 	if (!debug_locks_off())
@@ -199,6 +205,8 @@ void _raw_read_lock(rwlock_t *lock)
 	__raw_read_lock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_read_lock);
+
 int _raw_read_trylock(rwlock_t *lock)
 {
 	int ret = __raw_read_trylock(&lock->raw_lock);
@@ -212,12 +220,16 @@ int _raw_read_trylock(rwlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_read_trylock);
+
 void _raw_read_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
 	__raw_read_unlock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_read_unlock);
+
 static inline void debug_write_lock_before(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
@@ -275,6 +287,8 @@ void _raw_write_lock(rwlock_t *lock)
 	debug_write_lock_after(lock);
 }
 
+EXPORT_SYMBOL(_raw_write_lock);
+
 int _raw_write_trylock(rwlock_t *lock)
 {
 	int ret = __raw_write_trylock(&lock->raw_lock);
@@ -290,8 +304,12 @@ int _raw_write_trylock(rwlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_write_trylock);
+
 void _raw_write_unlock(rwlock_t *lock)
 {
 	debug_write_unlock(lock);
 	__raw_write_unlock(&lock->raw_lock);
 }
+
+EXPORT_SYMBOL(_raw_write_unlock);
Index: linux-2.6.23/mm/memory.c
===================================================================
--- linux-2.6.23.orig/mm/memory.c
+++ linux-2.6.23/mm/memory.c
@@ -50,6 +50,7 @@
 #include <linux/delayacct.h>
 #include <linux/init.h>
 #include <linux/writeback.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -416,6 +417,34 @@ struct page *vm_normal_page(struct vm_ar
 	return pfn_to_page(pfn);
 }
 
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
+{
+	/*
+	 * If the source page was a PFN mapping, we don't have
+	 * a "struct page" for it. We do a best-effort copy by
+	 * just copying from the original user address. If that
+	 * fails, we just zero-fill it. Live with it.
+	 */
+	if (unlikely(!src)) {
+		void *kaddr = kmap_atomic(dst, KM_USER0);
+		void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
+		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+			memset(kaddr, 0, PAGE_SIZE);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(dst);
+		return;
+		
+	}
+	copy_user_highpage(dst, src, va, vma);
+}
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
@@ -424,8 +453,8 @@ struct page *vm_normal_page(struct vm_ar
 
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -464,6 +493,21 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			cow_user_page(uncow_page, old_page, addr, vma);
+			pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+			if (vm_flags & VM_SHARED)
+				pte = pte_mkclean(pte);
+			pte = pte_mkold(pte);
+
+			page_dup_rmap(uncow_page, vma, addr);
+			rss[!!PageAnon(uncow_page)]++;
+			goto out_set_pte;
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -494,13 +538,27 @@ static int copy_pte_range(struct mm_stru
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
+	struct page *uncow_page = NULL;
 	int rss[2];
-
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
+again:
+	if (do_cow_break) {
+		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (!uncow_page)
+			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
 again:
+#endif
 	rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			page_cache_release(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -522,7 +580,20 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags)) {
+				if (((vma->vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED))
+				    == (VM_LOCKED|VM_PINNED)) {
+					do_cow_break = 1;
+					break;
+				}
+			}
+		}
+#endif
+		copy_one_pte(dst_mm, src_mm, dst_pte,
+			     src_pte, vma, addr, rss, uncow_page);
+		uncow_page = NULL;
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -1586,34 +1657,6 @@ static inline pte_t maybe_mkwrite(pte_t 
 	return pte;
 }
 
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
-{
-	/*
-	 * If the source page was a PFN mapping, we don't have
-	 * a "struct page" for it. We do a best-effort copy by
-	 * just copying from the original user address. If that
-	 * fails, we just zero-fill it. Live with it.
-	 */
-	if (unlikely(!src)) {
-		void *kaddr = kmap_atomic(dst, KM_USER0);
-		void __user *uaddr = (void __user *)(va & PAGE_MASK);
-
-		/*
-		 * This really shouldn't fail, because the page is there
-		 * in the page tables. But it might just be unreadable,
-		 * in which case we just give up and fill the result with
-		 * zeroes.
-		 */
-		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
-			memset(kaddr, 0, PAGE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(dst);
-		return;
-
-	}
-	copy_user_highpage(dst, src, va, vma);
-}
-
 /*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
@@ -2867,3 +2910,104 @@ int access_process_vm(struct task_struct
 	return buf - old_buf;
 }
 EXPORT_SYMBOL_GPL(access_process_vm);
+
+#ifdef CONFIG_IPIPE
+
+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	spinlock_t *ptl;
+	pte_t *pte;
+
+	do {
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte)
+			continue;
+
+		if (!pte_present(*pte)) {
+			pte_unmap_unlock(pte, ptl);
+			continue;
+		}
+
+		if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
+			return -ENOMEM;
+	} while (addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (ipipe_pin_pte_range(mm, pmd, vma, addr, end))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pud_t *pud;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (ipipe_pin_pmd_range(mm, pud, vma, addr, end))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
+{
+	unsigned long addr, next, end;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	int result = 0;
+	pgd_t *pgd;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	down_write(&mm->mmap_sem);
+	if (mm->def_flags & VM_PINNED)
+		goto done_mm;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (!is_cow_mapping(vma->vm_flags))
+			continue;
+
+		addr = vma->vm_start;
+		end = vma->vm_end;
+
+		pgd = pgd_offset(mm, addr);
+		do {
+			next = pgd_addr_end(addr, end);
+			if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) {
+				result = -ENOMEM;
+				goto done_mm;
+			}
+		} while (pgd++, addr = next, addr != end);
+	}
+	mm->def_flags |= VM_PINNED;
+
+  done_mm:
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	return result;
+}
+
+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
+
+#endif
Index: linux-2.6.23/mm/mlock.c
===================================================================
--- linux-2.6.23.orig/mm/mlock.c
+++ linux-2.6.23/mm/mlock.c
@@ -173,10 +173,10 @@ asmlinkage long sys_munlock(unsigned lon
 static int do_mlockall(int flags)
 {
 	struct vm_area_struct * vma, * prev = NULL;
-	unsigned int def_flags = 0;
+	unsigned int def_flags = current->mm->def_flags & VM_PINNED;
 
 	if (flags & MCL_FUTURE)
-		def_flags = VM_LOCKED;
+		def_flags |= VM_LOCKED;
 	current->mm->def_flags = def_flags;
 	if (flags == MCL_FUTURE)
 		goto out;
Index: linux-2.6.23/mm/vmalloc.c
===================================================================
--- linux-2.6.23.orig/mm/vmalloc.c
+++ linux-2.6.23/mm/vmalloc.c
@@ -161,6 +161,7 @@ int map_vm_area(struct vm_struct *area, 
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+	__ipipe_pin_range_globally((unsigned long) area->addr, end);
 	flush_cache_vmap((unsigned long) area->addr, end);
 	return err;
 }
Index: linux-2.6.23/kernel/spinlock.c
===================================================================
--- linux-2.6.23.orig/kernel/spinlock.c
+++ linux-2.6.23/kernel/spinlock.c
@@ -88,7 +88,7 @@ unsigned long __lockfunc _spin_lock_irqs
 	 * _raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 #else
 	_raw_spin_lock_flags(lock, &flags);
@@ -305,7 +305,7 @@ unsigned long __lockfunc _spin_lock_irqs
 	 * _raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 #else
 	_raw_spin_lock_flags(lock, &flags);
Index: linux-2.6.23/arch/i386/boot/compressed/Makefile
===================================================================
--- linux-2.6.23.orig/arch/i386/boot/compressed/Makefile
+++ linux-2.6.23/arch/i386/boot/compressed/Makefile
@@ -12,6 +12,7 @@ LDFLAGS_vmlinux := -T
 hostprogs-y	:= relocs
 
 CFLAGS  := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
+	   -Iinclude/asm-i386/mach-default \
 	   -fno-strict-aliasing -fPIC \
 	   $(call cc-option,-ffreestanding) \
 	   $(call cc-option,-fno-stack-protector)

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 252 bytes --]

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2007-10-13  9:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-10-13  9:10 [Xenomai-core] [PREVIEW] ipipe-2.6.23-i386-1.10-07 Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.