All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kiszka <jan.kiszka@domain.hid>
To: adeos-main@gna.org
Cc: Xenomai-core@domain.hid
Subject: [Xenomai-core] [PREVIEW] ipipe-2.6.23-i386-1.10-07
Date: Sat, 13 Oct 2007 11:10:20 +0200	[thread overview]
Message-ID: <47108B7C.4080906@domain.hid> (raw)


[-- Attachment #1.1: Type: text/plain, Size: 528 bytes --]

This is a forward port of the latest ipipe patch for 2.6.22 to 2.6.23,
additionally based on Philippe's earlier work for -rc2. It runs
surprisingly well here, so I would like to invite more testers to the
party. You need Xenomai SVN head + my timer setup fix [1] to create a
test platform.

I had to perform one small magic dance to get the patch compiling due to
inclusion hell around ipipe_base.h, check
arch/i386/boot/compressed/Makefile.

Jan

[1] https://mail.gna.org/public/xenomai-core/2007-10/msg00068.html

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1.2: adeos-ipipe-2.6.23-i386-1.10-07.patch --]
[-- Type: text/x-patch; name="adeos-ipipe-2.6.23-i386-1.10-07.patch", Size: 252687 bytes --]

Index: linux-2.6.23/Makefile
===================================================================
--- linux-2.6.23.orig/Makefile
+++ linux-2.6.23/Makefile
@@ -491,6 +491,10 @@ endif
 
 include $(srctree)/arch/$(ARCH)/Makefile
 
+ifdef CONFIG_IPIPE_TRACE_MCOUNT
+CFLAGS          += -pg
+endif
+
 ifdef CONFIG_FRAME_POINTER
 CFLAGS		+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
Index: linux-2.6.23/arch/i386/Kconfig
===================================================================
--- linux-2.6.23.orig/arch/i386/Kconfig
+++ linux-2.6.23/arch/i386/Kconfig
@@ -217,7 +217,7 @@ endchoice
 config PARAVIRT
 	bool "Paravirtualization support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
-	depends on !(X86_VISWS || X86_VOYAGER)
+	depends on !(X86_VISWS || X86_VOYAGER || IPIPE)
 	help
 	  Paravirtualization is a way of running multiple instances of
 	  Linux on the same machine, under a hypervisor.  This option
@@ -315,6 +315,8 @@ config SCHED_MC
 
 source "kernel/Kconfig.preempt"
 
+source "kernel/ipipe/Kconfig"
+
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
 	depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
Index: linux-2.6.23/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/Makefile
+++ linux-2.6.23/arch/i386/kernel/Makefile
@@ -32,6 +32,8 @@ obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-y				+= sysenter.o vsyscall.o
+obj-$(CONFIG_IPIPE)		+= ipipe.o
+obj-$(CONFIG_IPIPE_TRACE_MCOUNT)	+= mcount.o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_stub.o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
Index: linux-2.6.23/arch/i386/kernel/apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/apic.c
+++ linux-2.6.23/arch/i386/kernel/apic.c
@@ -250,7 +250,7 @@ static void lapic_timer_setup(enum clock
 	if (!local_apic_timer_verify_ok)
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	switch (mode) {
 	case CLOCK_EVT_MODE_PERIODIC:
@@ -269,7 +269,7 @@ static void lapic_timer_setup(enum clock
 		break;
 	}
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 /*
@@ -716,13 +716,18 @@ void lapic_shutdown(void)
 	if (!cpu_has_apic)
 		return;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	clear_local_APIC();
 
 	if (enabled_via_apicbase)
 		disable_local_APIC();
 
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
+}
+
+int __ipipe_check_lapic(void)
+{
+	return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY);
 }
 
 /*
@@ -1272,7 +1277,7 @@ void smp_spurious_interrupt(struct pt_re
 	 */
 	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
 	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-		ack_APIC_irq();
+		__ack_APIC_irq();
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
 	printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
@@ -1329,6 +1334,12 @@ void __init apic_intr_init(void)
 #ifdef CONFIG_X86_MCE_P4THERMAL
 	set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
+#ifdef CONFIG_IPIPE
+	set_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0);
+	set_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1);
+	set_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2);
+	set_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3);
+#endif
 }
 
 /**
@@ -1467,9 +1478,9 @@ static int lapic_suspend(struct sys_devi
 		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
 #endif
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	disable_local_APIC();
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return 0;
 }
 
@@ -1484,7 +1495,7 @@ static int lapic_resume(struct sys_devic
 
 	maxlvt = lapic_get_maxlvt();
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 
 	/*
 	 * Make sure the APICBASE points to the right address
@@ -1519,7 +1530,7 @@ static int lapic_resume(struct sys_devic
 	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 	return 0;
 }
 
Index: linux-2.6.23/arch/i386/kernel/entry.S
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/entry.S
+++ linux-2.6.23/arch/i386/kernel/entry.S
@@ -44,6 +44,7 @@
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
+#include <asm/ipipe_base.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -75,6 +76,58 @@ DF_MASK		= 0x00000400 
 NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
+#ifdef CONFIG_IPIPE
+#define EMULATE_ROOT_IRET(bypass) \
+				call __ipipe_unstall_iret_root ; \
+				TRACE_IRQS_ON ; \
+				bypass: \
+				movl PT_EAX(%esp),%eax
+#define TEST_PREEMPTIBLE(regs)  call __ipipe_kpreempt_root ; testl %eax,%eax
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2)	\
+				call __ipipe_syscall_root ; \
+				testl  %eax,%eax ; \
+				js    bypass1 ; \
+				jne   bypass2 ; \
+				movl PT_ORIG_EAX(%esp),%eax
+#define PUSH_XCODE(v)		pushl $ ex_ ## v
+#define PUSH_XVEC(v)		pushl $ ex_ ## v
+#define HANDLE_EXCEPTION(code)	movl %code,%ecx ; \
+				call __ipipe_handle_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck_notrace
+#define DIVERT_EXCEPTION(code)	movl $(__USER_DS), %ecx	; \
+				movl %ecx, %ds ; \
+				movl %ecx, %es ; \
+				movl %esp, %eax	; \
+				movl $ex_ ## code,%edx ; \
+				call __ipipe_divert_exception ; \
+				testl %eax,%eax	; \
+				jnz restore_nocheck_notrace
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+# define IPIPE_TRACE_IRQ_ENTER \
+	lea PT_EIP-4(%esp), %ebp; \
+	movl PT_ORIG_EAX(%esp), %eax; \
+	call ipipe_trace_begin
+# define IPIPE_TRACE_IRQ_EXIT \
+	pushl %eax; \
+	movl PT_ORIG_EAX+4(%esp), %eax; \
+	call ipipe_trace_end; \
+	popl %eax
+#else  /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+#define IPIPE_TRACE_IRQ_ENTER
+#define IPIPE_TRACE_IRQ_EXIT
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+#else /* !CONFIG_IPIPE */
+#define EMULATE_ROOT_IRET(bypass)
+#define TEST_PREEMPTIBLE(regs)		testl $IF_MASK,PT_EFLAGS(regs)
+#define CATCH_ROOT_SYSCALL(bypass1,bypass2)
+#define PUSH_XCODE(v)			pushl $v
+#define PUSH_XVEC(v)			pushl v
+#define HANDLE_EXCEPTION(code)		call *%code
+#define DIVERT_EXCEPTION(code)
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -214,6 +267,7 @@ VM_MASK		= 0x00020000
 	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
 
 ENTRY(ret_from_fork)
+	ENABLE_INTERRUPTS_HW_COND
 	CFI_STARTPROC
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
@@ -241,7 +295,7 @@ END(ret_from_fork)
 	RING0_PTREGS_FRAME
 ret_from_exception:
 	preempt_stop(CLBR_ANY)
-ret_from_intr:
+ENTRY(ret_from_intr)
 	GET_THREAD_INFO(%ebp)
 check_userspace:
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
@@ -263,14 +317,14 @@ END(ret_from_exception)
 
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
-	DISABLE_INTERRUPTS(CLBR_ANY)
+	DISABLE_INTERRUPTS_HW(CLBR_ANY)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_nocheck
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
-	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
+ 	TEST_PREEMPTIBLE(%esp)		# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
@@ -293,7 +347,7 @@ sysenter_past_esp:
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
+	ENABLE_INTERRUPTS_HW(CLBR_NONE)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
@@ -330,6 +384,7 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+	CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -338,16 +393,20 @@ sysenter_past_esp:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+sysenter_tail:
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
 /* if something modifies registers it must also disable sysexit */
+	EMULATE_ROOT_IRET(sysenter_exit)
 	movl PT_EIP(%esp), %edx
 	movl PT_OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
-	TRACE_IRQS_ON
+#ifndef CONFIG_IPIPE
+  	TRACE_IRQS_ON
+#endif
 1:	mov  PT_FS(%esp), %fs
 	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
@@ -367,6 +426,7 @@ ENTRY(system_call)
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
+ 	CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck_notrace)
 					# system call tracing in operation / emulation
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
 	testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
@@ -401,7 +461,11 @@ restore_all:
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
+#ifdef CONFIG_IPIPE
+	call __ipipe_unstall_iret_root
+#else /* !CONFIG_IPIPE */
 	TRACE_IRQS_IRET
+#endif /* CONFIG_IPIPE */
 restore_nocheck_notrace:
 	RESTORE_REGS
 	addl $4, %esp			# skip orig_eax/error_code
@@ -410,7 +474,7 @@ restore_nocheck_notrace:
 .section .fixup,"ax"
 iret_exc:
 	pushl $0			# no error code
-	pushl $do_iret_error
+	PUSH_XCODE(do_iret_error)
 	jmp error_code
 .previous
 .section __ex_table,"a"
@@ -451,7 +515,7 @@ ldt_ss:
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	DISABLE_INTERRUPTS(CLBR_EAX)
+	DISABLE_INTERRUPTS_HW(CLBR_EAX)
 	TRACE_IRQS_OFF
 	lss (%esp), %esp
 	CFI_ADJUST_CFA_OFFSET -8
@@ -466,6 +530,7 @@ work_pending:
 	testb $_TIF_NEED_RESCHED, %cl
 	jz work_notifysig
 work_resched:
+	ENABLE_INTERRUPTS_HW_COND
 	call schedule
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
@@ -605,6 +670,47 @@ END(irq_entries_start)
 END(interrupt)
 .previous
 
+#ifdef CONFIG_IPIPE
+	ALIGN
+common_interrupt:
+	SAVE_ALL
+	IPIPE_TRACE_IRQ_ENTER
+	call __ipipe_handle_irq
+	IPIPE_TRACE_IRQ_EXIT
+	testl %eax,%eax
+	jnz  ret_from_intr
+	RESTORE_REGS
+	addl $4, %esp
+	iret
+	CFI_ENDPROC
+
+#define BUILD_INTERRUPT(name, nr)	\
+ENTRY(name)				\
+	RING0_INT_FRAME;		\
+	pushl $~(nr-FIRST_SYSTEM_VECTOR+NR_IRQS);\
+	CFI_ADJUST_CFA_OFFSET 4;	\
+	SAVE_ALL;			\
+	IPIPE_TRACE_IRQ_ENTER;	\
+	call __ipipe_handle_irq;	\
+	IPIPE_TRACE_IRQ_EXIT;		\
+	testl %eax,%eax;		\
+	jnz  ret_from_intr;		\
+	RESTORE_REGS;			\
+	addl $4, %esp;			\
+	iret;			 	\
+	CFI_ENDPROC
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0)
+	BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1)
+	BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2)
+	BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3)
+#ifdef CONFIG_SMP
+	BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR)
+#endif
+#endif
+
+#else /* !CONFIG_IPIPE */
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -632,12 +738,14 @@ ENTRY(name)				\
 	CFI_ENDPROC;			\
 ENDPROC(name)
 
+#endif /* !CONFIG_IPIPE */
+
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
 KPROBE_ENTRY(page_fault)
 	RING0_EC_FRAME
-	pushl $do_page_fault
+	PUSH_XCODE(do_page_fault)
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 error_code:
@@ -688,7 +796,7 @@ error_code:
 	movl %ecx, %ds
 	movl %ecx, %es
 	movl %esp,%eax			# pt_regs pointer
-	call *%edi
+	HANDLE_EXCEPTION(edi)
 	jmp ret_from_exception
 	CFI_ENDPROC
 KPROBE_END(page_fault)
@@ -697,7 +805,7 @@ ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_error
+	PUSH_XCODE(do_coprocessor_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -707,7 +815,7 @@ ENTRY(simd_coprocessor_error)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_simd_coprocessor_error
+	PUSH_XCODE(do_simd_coprocessor_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -718,6 +826,7 @@ ENTRY(device_not_available)
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(device_not_available)
 	GET_CR0_INTO_EAX
 	testl $0x4, %eax		# EM (math emulation bit)
 	jne device_not_available_emulate
@@ -771,6 +880,7 @@ debug_stack_correct:
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(do_debug)
 	xorl %edx,%edx			# error code 0
 	movl %esp,%eax			# pt_regs pointer
 	call do_debug
@@ -889,6 +999,7 @@ KPROBE_ENTRY(int3)
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
+	DIVERT_EXCEPTION(do_int3)
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_int3
@@ -900,7 +1011,7 @@ ENTRY(overflow)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_overflow
+	PUSH_XCODE(do_overflow)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -910,7 +1021,7 @@ ENTRY(bounds)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_bounds
+	PUSH_XCODE(do_bounds)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -920,7 +1031,7 @@ ENTRY(invalid_op)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_invalid_op
+	PUSH_XCODE(do_invalid_op)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -930,7 +1041,7 @@ ENTRY(coprocessor_segment_overrun)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_coprocessor_segment_overrun
+ 	PUSH_XCODE(do_coprocessor_segment_overrun)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -938,7 +1049,7 @@ END(coprocessor_segment_overrun)
 
 ENTRY(invalid_TSS)
 	RING0_EC_FRAME
-	pushl $do_invalid_TSS
+ 	PUSH_XCODE(do_invalid_TSS)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -946,7 +1057,7 @@ END(invalid_TSS)
 
 ENTRY(segment_not_present)
 	RING0_EC_FRAME
-	pushl $do_segment_not_present
+ 	PUSH_XCODE(do_segment_not_present)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -954,7 +1065,7 @@ END(segment_not_present)
 
 ENTRY(stack_segment)
 	RING0_EC_FRAME
-	pushl $do_stack_segment
+ 	PUSH_XCODE(do_stack_segment)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -962,7 +1073,7 @@ END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
 	RING0_EC_FRAME
-	pushl $do_general_protection
+ 	PUSH_XCODE(do_general_protection)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -970,7 +1081,7 @@ KPROBE_END(general_protection)
 
 ENTRY(alignment_check)
 	RING0_EC_FRAME
-	pushl $do_alignment_check
+	PUSH_XCODE(do_alignment_check)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -980,7 +1091,7 @@ ENTRY(divide_error)
 	RING0_INT_FRAME
 	pushl $0			# no error code
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_divide_error
+	PUSH_XCODE(do_divide_error)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -991,7 +1102,7 @@ ENTRY(machine_check)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl machine_check_vector
+	PUSH_XVEC(machine_check_vector)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
@@ -1002,7 +1113,7 @@ ENTRY(spurious_interrupt_bug)
 	RING0_INT_FRAME
 	pushl $0
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_spurious_interrupt_bug
+	PUSH_XCODE(do_spurious_interrupt_bug)
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	CFI_ENDPROC
Index: linux-2.6.23/arch/i386/kernel/i8253.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/i8253.c
+++ linux-2.6.23/arch/i386/kernel/i8253.c
@@ -4,6 +4,7 @@
  */
 #include <linux/clockchips.h>
 #include <linux/init.h>
+#include <linux/ipipe.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
@@ -15,7 +16,7 @@
 #include <asm/io.h>
 #include <asm/timer.h>
 
-DEFINE_SPINLOCK(i8253_lock);
+IPIPE_DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
 /*
@@ -133,6 +134,12 @@ static cycle_t pit_read(void)
 	static int old_count;
 	static u32 old_jifs;
 
+#ifdef CONFIG_IPIPE
+	if (!__ipipe_pipeline_head_p(ipipe_root_domain))
+		/* We don't really own the PIT. */
+		return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count;
+#endif /* CONFIG_IPIPE */
+
 	spin_lock_irqsave(&i8253_lock, flags);
 	/*
 	 * Although our caller may have the read side of xtime_lock,
Index: linux-2.6.23/arch/i386/kernel/i8259.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/i8259.c
+++ linux-2.6.23/arch/i386/kernel/i8259.c
@@ -34,7 +34,7 @@
  */
 
 static int i8259A_auto_eoi;
-DEFINE_SPINLOCK(i8259A_lock);
+IPIPE_DEFINE_SPINLOCK(i8259A_lock);
 static void mask_and_ack_8259A(unsigned int);
 
 static struct irq_chip i8259A_chip = {
@@ -71,6 +71,7 @@ void disable_8259A_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259A_lock, flags);
+	ipipe_irq_lock(irq);
 	cached_irq_mask |= mask;
 	if (irq & 8)
 		outb(cached_slave_mask, PIC_SLAVE_IMR);
@@ -81,15 +82,18 @@ void disable_8259A_irq(unsigned int irq)
 
 void enable_8259A_irq(unsigned int irq)
 {
-	unsigned int mask = ~(1 << irq);
+	unsigned int mask = (1 << irq);
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8259A_lock, flags);
-	cached_irq_mask &= mask;
-	if (irq & 8)
-		outb(cached_slave_mask, PIC_SLAVE_IMR);
-	else
-		outb(cached_master_mask, PIC_MASTER_IMR);
+	if (cached_irq_mask & mask) {
+		cached_irq_mask &= ~mask;
+		if (irq & 8)
+			outb(cached_slave_mask, PIC_SLAVE_IMR);
+		else
+			outb(cached_master_mask, PIC_MASTER_IMR);
+		ipipe_irq_unlock(irq);
+	}
 	spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
@@ -170,6 +174,15 @@ static void mask_and_ack_8259A(unsigned 
 	 */
 	if (cached_irq_mask & irqmask)
 		goto spurious_8259A_irq;
+#ifdef CONFIG_IPIPE
+	if (irq == 0) {
+	    /* Fast timer ack -- don't mask (unless supposedly
+	      spurious) */
+	    outb(0x60, PIC_MASTER_CMD);	/* Specific EOI to master. */
+	    spin_unlock_irqrestore(&i8259A_lock, flags);
+	    return;
+	}
+#endif /* CONFIG_IPIPE */
 	cached_irq_mask |= irqmask;
 
 handle_real_irq:
Index: linux-2.6.23/arch/i386/kernel/io_apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/io_apic.c
+++ linux-2.6.23/arch/i386/kernel/io_apic.c
@@ -56,8 +56,8 @@ atomic_t irq_mis_count;
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
-static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+static IPIPE_DEFINE_SPINLOCK(ioapic_lock);
+static IPIPE_DEFINE_SPINLOCK(vector_lock);
 
 int timer_over_8254 __initdata = 1;
 
@@ -278,6 +278,7 @@ static void mask_IO_APIC_irq (unsigned i
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
+	ipipe_irq_lock(irq);
 	__mask_IO_APIC_irq(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
@@ -288,6 +289,7 @@ static void unmask_IO_APIC_irq (unsigned
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	__unmask_IO_APIC_irq(irq);
+	ipipe_irq_unlock(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
@@ -729,8 +731,10 @@ late_initcall(balanced_irq_init);
 #ifndef CONFIG_SMP
 void fastcall send_IPI_self(int vector)
 {
+	unsigned long flags;
 	unsigned int cfg;
 
+	local_irq_save_hw_cond(flags);
 	/*
 	 * Wait for idle.
 	 */
@@ -740,6 +744,7 @@ void fastcall send_IPI_self(int vector)
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	apic_write_around(APIC_ICR, cfg);
+	local_irq_restore_hw_cond(flags);
 }
 #endif /* !CONFIG_SMP */
 
@@ -1939,6 +1944,7 @@ static unsigned int startup_ioapic_irq(u
 			was_pending = 1;
 	}
 	__unmask_IO_APIC_irq(irq);
+	ipipe_irq_unlock(irq);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
@@ -1946,8 +1952,10 @@ static unsigned int startup_ioapic_irq(u
 
 static void ack_ioapic_irq(unsigned int irq)
 {
+#ifndef CONFIG_IPIPE
 	move_native_irq(irq);
-	ack_APIC_irq();
+#endif /* CONFIG_IPIPE */
+	__ack_APIC_irq();
 }
 
 static void ack_ioapic_quirk_irq(unsigned int irq)
@@ -1955,7 +1963,9 @@ static void ack_ioapic_quirk_irq(unsigne
 	unsigned long v;
 	int i;
 
+#ifndef CONFIG_IPIPE
 	move_native_irq(irq);
+#endif /* CONFIG_IPIPE */
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1979,7 +1989,7 @@ static void ack_ioapic_quirk_irq(unsigne
 
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
-	ack_APIC_irq();
+	__ack_APIC_irq();
 
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
@@ -1988,6 +1998,17 @@ static void ack_ioapic_quirk_irq(unsigne
 		__unmask_and_level_IO_APIC_irq(irq);
 		spin_unlock(&ioapic_lock);
 	}
+
+#ifdef CONFIG_IPIPE
+/*
+ * Prevent low priority IRQs grabbed by high priority domains from
+ * being delayed, waiting for a high priority interrupt handler
+ * running in a low priority domain to complete.
+ */
+	spin_lock(&ioapic_lock);
+	__mask_IO_APIC_irq(irq);
+	spin_unlock(&ioapic_lock);
+#endif
 }
 
 static int ioapic_retrigger_irq(unsigned int irq)
@@ -2049,23 +2070,29 @@ static inline void init_IO_APIC_traps(vo
 
 static void ack_apic(unsigned int irq)
 {
-	ack_APIC_irq();
+	__ack_APIC_irq();
 }
 
 static void mask_lapic_irq (unsigned int irq)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	spin_lock_irqsave(&ioapic_lock, flags);
+	ipipe_irq_lock(irq);
 	v = apic_read(APIC_LVT0);
 	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static void unmask_lapic_irq (unsigned int irq)
 {
-	unsigned long v;
+	unsigned long v, flags;
 
+	spin_lock_irqsave(&ioapic_lock, flags);
 	v = apic_read(APIC_LVT0);
 	apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+	ipipe_irq_unlock(irq);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
 static struct irq_chip lapic_chip __read_mostly = {
Index: linux-2.6.23/arch/i386/kernel/ipipe.c
===================================================================
--- /dev/null
+++ linux-2.6.23/arch/i386/kernel/ipipe.c
@@ -0,0 +1,817 @@
+/*   -*- linux-c -*-
+ *   linux/arch/i386/kernel/ipipe.c
+ *
+ *   Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ *   Architecture-dependent I-PIPE support for x86.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/clockchips.h>
+#include <asm/unistd.h>
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/hw_irq.h>
+#include <asm/irq.h>
+#include <asm/desc.h>
+#include <asm/io.h>
+#ifdef CONFIG_X86_LOCAL_APIC
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif	/* CONFIG_X86_IO_APIC */
+#include <asm/apic.h>
+#include <mach_ipi.h>
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+extern struct clock_event_device *global_clock_event;
+
+extern struct clock_event_device pit_clockevent;
+
+int __ipipe_tick_irq;
+
+#ifdef CONFIG_SMP
+
+static cpumask_t __ipipe_cpu_sync_map;
+
+static cpumask_t __ipipe_cpu_lock_map;
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier);
+
+static atomic_t __ipipe_critical_count = ATOMIC_INIT(0);
+
+static void (*__ipipe_cpu_sync) (void);
+
+#endif /* CONFIG_SMP */
+
+/* ipipe_trigger_irq() -- Push the interrupt at front of the pipeline
+   just like if it has been actually received from a hw source. Also
+   works for virtual interrupts. */
+
+int fastcall ipipe_trigger_irq(unsigned irq)
+{
+	struct pt_regs regs;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS ||
+	    (ipipe_virtual_irq_p(irq) &&
+	     !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)))
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	regs.orig_eax = irq;	/* Won't be acked */
+	regs.xcs = __KERNEL_CS;
+	regs.eflags = flags;
+
+	__ipipe_handle_irq(regs);
+
+	local_irq_restore_hw(flags);
+
+	return 1;
+}
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *info)
+{
+	info->ncpus = num_online_cpus();
+	info->cpufreq = ipipe_cpu_freq();
+	info->archdep.tmirq = __ipipe_tick_irq;
+#ifdef CONFIG_X86_TSC
+	info->archdep.tmfreq = ipipe_cpu_freq();
+#else	/* !CONFIG_X86_TSC */
+	info->archdep.tmfreq = CLOCK_TICK_RATE;
+#endif	/* CONFIG_X86_TSC */
+
+	return 0;
+}
+
+fastcall unsigned int do_IRQ(struct pt_regs *regs);
+fastcall void smp_apic_timer_interrupt(struct pt_regs *regs);
+fastcall void smp_spurious_interrupt(struct pt_regs *regs);
+fastcall void smp_error_interrupt(struct pt_regs *regs);
+fastcall void smp_thermal_interrupt(struct pt_regs *regs);
+fastcall void smp_reschedule_interrupt(struct pt_regs *regs);
+fastcall void smp_invalidate_interrupt(struct pt_regs *regs);
+fastcall void smp_call_function_interrupt(struct pt_regs *regs);
+
+static int __ipipe_ack_irq(unsigned irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	desc->ipipe_ack(irq, desc);
+	return 1;
+}
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq)
+{
+	irq_desc[irq].status &= ~IRQ_DISABLED;
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static int __ipipe_noack_apic(unsigned irq)
+{
+	return 1;
+}
+
+int __ipipe_ack_apic(unsigned irq)
+{
+	__ack_APIC_irq();
+	return 1;
+}
+
+static void __ipipe_null_handler(unsigned irq, void *cookie)
+{
+}
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw
+   interrupts are off, and secondary CPUs are still lost in space. */
+
+void __init __ipipe_enable_pipeline(void)
+{
+	unsigned irq;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+	/* Map the APIC system vectors. */
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR),
+			     (ipipe_irq_handler_t)&smp_apic_timer_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_spurious_interrupt,
+			     NULL,
+			     &__ipipe_noack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(ERROR_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_error_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3),
+			     &__ipipe_null_handler,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_MCE_P4THERMAL
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(THERMAL_APIC_VECTOR),
+			     (ipipe_irq_handler_t)&smp_thermal_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+#endif /* CONFIG_X86_MCE_P4THERMAL */
+
+	__ipipe_tick_irq = global_clock_event == &pit_clockevent ? 0
+		: ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR);
+
+#else	/* !CONFIG_X86_LOCAL_APIC */
+
+	__ipipe_tick_irq = 0;
+
+#endif	/* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_SMP
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(RESCHEDULE_VECTOR),
+			     (ipipe_irq_handler_t)&smp_reschedule_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(INVALIDATE_TLB_VECTOR),
+			     (ipipe_irq_handler_t)&smp_invalidate_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+	ipipe_virtualize_irq(ipipe_root_domain,
+			     ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR),
+			     (ipipe_irq_handler_t)&smp_call_function_interrupt,
+			     NULL,
+			     &__ipipe_ack_apic,
+			     IPIPE_STDROOT_MASK);
+
+#endif	/* CONFIG_SMP */
+
+	/* Finally, virtualize the remaining ISA and IO-APIC
+	 * interrupts. Interrupts which have already been virtualized
+	 * will just beget a silent -EPERM error since
+	 * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */
+
+	for (irq = 0; irq < NR_IRQS; irq++)
+		/* Fails for IPIPE_CRITICAL_IPI but that's ok. */
+		ipipe_virtualize_irq(ipipe_root_domain,
+				     irq,
+				     (ipipe_irq_handler_t)&do_IRQ,
+				     NULL,
+				     &__ipipe_ack_irq,
+				     IPIPE_STDROOT_MASK);
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	/* Eventually allow these vectors to be reprogrammed. */
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK;
+	ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK;
+#endif	/* CONFIG_X86_LOCAL_APIC */
+}
+
+#ifdef CONFIG_SMP
+
+cpumask_t __ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
+{
+	cpumask_t oldmask = irq_desc[irq].affinity;
+
+	if (irq_desc[irq].chip->set_affinity == NULL)
+		return CPU_MASK_NONE;
+
+	if (cpus_empty(cpumask))
+		return oldmask; /* Return mask value -- no change. */
+
+	cpus_and(cpumask,cpumask,cpu_online_map);
+
+	if (cpus_empty(cpumask))
+		return CPU_MASK_NONE;	/* Error -- bad mask value or non-routable IRQ. */
+
+	irq_desc[irq].chip->set_affinity(irq,cpumask);
+
+	return oldmask;
+}
+
+int fastcall __ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
+{
+	unsigned long flags;
+	int self;
+
+	if (ipi != IPIPE_SERVICE_IPI0 &&
+	    ipi != IPIPE_SERVICE_IPI1 &&
+	    ipi != IPIPE_SERVICE_IPI2 &&
+	    ipi != IPIPE_SERVICE_IPI3)
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	self = cpu_isset(ipipe_processor_id(),cpumask);
+	cpu_clear(ipipe_processor_id(), cpumask);
+
+	if (!cpus_empty(cpumask))
+		send_IPI_mask(cpumask,ipipe_apic_irq_vector(ipi));
+
+	if (self)
+		ipipe_trigger_irq(ipi);
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+/* Always called with hw interrupts off. */
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie)
+{
+	int cpu = ipipe_processor_id();
+
+	cpu_set(cpu, __ipipe_cpu_sync_map);
+
+	/* Now we are in sync with the lock requestor running on another
+	   CPU. Enter a spinning wait until he releases the global
+	   lock. */
+	spin_lock(&__ipipe_cpu_barrier);
+
+	/* Got it. Now get out. */
+
+	if (__ipipe_cpu_sync)
+		/* Call the sync routine if any. */
+		__ipipe_cpu_sync();
+
+	spin_unlock(&__ipipe_cpu_barrier);
+
+	cpu_clear(cpu, __ipipe_cpu_sync_map);
+}
+
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd)
+{
+	ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_apic;
+	ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync;
+	ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL;
+	/* Immediately handle in the current domain but *never* pass */
+	ipd->irqs[IPIPE_CRITICAL_IPI].control =
+		IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK;
+}
+
+#endif	/* CONFIG_SMP */
+
+/* ipipe_critical_enter() -- Grab the superlock excluding all CPUs
+   but the current one from a critical section. This lock is used when
+   we must enforce a global critical section for a single CPU in a
+   possibly SMP system whichever context the CPUs are running. */
+
+unsigned long ipipe_critical_enter(void (*syncfn) (void))
+{
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+#ifdef CONFIG_SMP
+	if (unlikely(num_online_cpus() == 1))	/* We might be running a SMP-kernel on a UP box... */
+		return flags;
+
+	{
+		int cpu = ipipe_processor_id();
+		cpumask_t lock_map;
+
+		if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) {
+			while (cpu_test_and_set(BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) {
+				int n = 0;
+				do {
+					cpu_relax();
+				} while (++n < cpu);
+			}
+
+			spin_lock(&__ipipe_cpu_barrier);
+
+			__ipipe_cpu_sync = syncfn;
+
+			/* Send the sync IPI to all processors but the current one. */
+			send_IPI_allbutself(IPIPE_CRITICAL_VECTOR);
+
+			cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map);
+
+			while (!cpus_equal(__ipipe_cpu_sync_map, lock_map))
+				cpu_relax();
+		}
+
+		atomic_inc(&__ipipe_critical_count);
+	}
+#endif	/* CONFIG_SMP */
+
+	return flags;
+}
+
+/* ipipe_critical_exit() -- Release the superlock. */
+
+void ipipe_critical_exit(unsigned long flags)
+{
+#ifdef CONFIG_SMP
+	if (num_online_cpus() == 1)
+		goto out;
+
+	if (atomic_dec_and_test(&__ipipe_critical_count)) {
+		spin_unlock(&__ipipe_cpu_barrier);
+
+		while (!cpus_empty(__ipipe_cpu_sync_map))
+			cpu_relax();
+
+		cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map);
+		cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map);
+	}
+out:
+#endif	/* CONFIG_SMP */
+
+	local_irq_restore_hw(flags);
+}
+
+static inline void __fixup_if(struct pt_regs *regs)
+{
+	if (!ipipe_root_domain_p)
+		return;
+
+	/*
+	 * Have the saved hw state look like the domain stall bit, so
+	 * that __ipipe_unstall_iret_root() restores the proper
+	 * pipeline state for the root stage upon exit.
+	 */
+
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		regs->eflags &= ~X86_EFLAGS_IF;
+	else
+		regs->eflags |= X86_EFLAGS_IF;
+}
+
+/*  Check the stall bit of the root domain to make sure the existing
+    preemption opportunity upon in-kernel resumption could be
+    exploited. In case a rescheduling could take place, the root stage
+    is stalled before the hw interrupts are re-enabled. This routine
+    must be called with hw interrupts off. */
+
+asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs)
+{
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		/* Root stage is stalled: rescheduling denied. */
+		return 0;
+
+	__ipipe_stall_root();
+	local_irq_enable_hw_notrace();
+
+	return 1;	/* Ok, may reschedule now. */
+}
+
+asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs)
+{
+	/* Emulate IRET's handling of the interrupt flag. */
+
+	local_irq_disable_hw();
+
+	/* Restore the software state as it used to be on kernel
+	   entry. CAUTION: NMIs must *not* return through this
+	   emulation. */
+
+	if (!(regs.eflags & X86_EFLAGS_IF)) {
+		if (!__test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+			trace_hardirqs_off();
+		regs.eflags |= X86_EFLAGS_IF;
+	} else {
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) {
+			trace_hardirqs_on();
+			__clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+		}
+
+		/* Only sync virtual IRQs here, so that we don't recurse
+		   indefinitely in case of an external interrupt flood. */
+
+		if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) != 0)
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
+	}
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+	ipipe_trace_end(0x8000000D);
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+}
+
+asmlinkage int __ipipe_syscall_root(struct pt_regs regs)
+{
+	unsigned long flags;
+
+	__fixup_if(&regs);
+
+	/* This routine either returns:
+	    0 -- if the syscall is to be passed to Linux;
+	   >0 -- if the syscall should not be passed to Linux, and no
+	   tail work should be performed;
+	   <0 -- if the syscall should not be passed to Linux but the
+	   tail work has to be performed (for handling signals etc). */
+
+	if (__ipipe_syscall_watched_p(current, regs.orig_eax) &&
+	    __ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL) &&
+	    __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL,&regs) > 0) {
+		/* We might enter here over a non-root domain and exit
+		 * over the root one as a result of the syscall
+		 * (i.e. by recycling the register set of the current
+		 * context across the migration), so we need to fixup
+		 * the interrupt flag upon return too, so that
+		 * __ipipe_unstall_iret_root() resets the correct
+		 * stall bit on exit. */
+		__fixup_if(&regs);
+
+		if (ipipe_root_domain_p && !in_atomic()) {
+			/* Sync pending VIRQs before _TIF_NEED_RESCHED is tested. */
+			local_irq_save_hw(flags);
+			if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) != 0)
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT);
+			local_irq_restore_hw(flags);
+			return -1;
+		}
+		return 1;
+	}
+
+    return 0;
+}
+
+static fastcall void do_machine_check_vector(struct pt_regs *regs, long error_code)
+{
+#ifdef CONFIG_X86_MCE
+	extern fastcall void (*machine_check_vector)(struct pt_regs *, long);
+	machine_check_vector(regs,error_code);
+#endif /* CONFIG_X86_MCE */
+}
+
+fastcall void do_divide_error(struct pt_regs *regs, long error_code);
+fastcall void do_overflow(struct pt_regs *regs, long error_code);
+fastcall void do_bounds(struct pt_regs *regs, long error_code);
+fastcall void do_invalid_op(struct pt_regs *regs, long error_code);
+fastcall void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
+fastcall void do_invalid_TSS(struct pt_regs *regs, long error_code);
+fastcall void do_segment_not_present(struct pt_regs *regs, long error_code);
+fastcall void do_stack_segment(struct pt_regs *regs, long error_code);
+fastcall void do_general_protection(struct pt_regs *regs, long error_code);
+fastcall void do_page_fault(struct pt_regs *regs, long error_code);
+fastcall void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
+fastcall void do_coprocessor_error(struct pt_regs *regs, long error_code);
+fastcall void do_alignment_check(struct pt_regs *regs, long error_code);
+fastcall void do_simd_coprocessor_error(struct pt_regs *regs, long error_code);
+fastcall void do_iret_error(struct pt_regs *regs, long error_code);
+
+/* Work around genksyms's issue with over-qualification in decls. */
+
+typedef fastcall void __ipipe_exhandler(struct pt_regs *, long);
+
+typedef __ipipe_exhandler *__ipipe_exptr;
+
+static __ipipe_exptr __ipipe_std_extable[] = {
+
+	[ex_do_divide_error] = &do_divide_error,
+	[ex_do_overflow] = &do_overflow,
+	[ex_do_bounds] = &do_bounds,
+	[ex_do_invalid_op] = &do_invalid_op,
+	[ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun,
+	[ex_do_invalid_TSS] = &do_invalid_TSS,
+	[ex_do_segment_not_present] = &do_segment_not_present,
+	[ex_do_stack_segment] = &do_stack_segment,
+	[ex_do_general_protection] = do_general_protection,
+	[ex_do_page_fault] = &do_page_fault,
+	[ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug,
+	[ex_do_coprocessor_error] = &do_coprocessor_error,
+	[ex_do_alignment_check] = &do_alignment_check,
+	[ex_machine_check_vector] = &do_machine_check_vector,
+	[ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error,
+	[ex_do_iret_error] = &do_iret_error,
+};
+
+#ifdef CONFIG_KGDB
+#include <linux/kgdb.h>
+
+static int __ipipe_xlate_signo[] = {
+
+	[ex_do_divide_error] = SIGFPE,
+	[ex_do_debug] = SIGTRAP,
+	[2] = -1,
+	[ex_do_int3] = SIGTRAP,
+	[ex_do_overflow] = SIGSEGV,
+	[ex_do_bounds] = SIGSEGV,
+	[ex_do_invalid_op] = SIGILL,
+	[ex_device_not_available] = -1,
+	[8] = -1,
+	[ex_do_coprocessor_segment_overrun] = SIGFPE,
+	[ex_do_invalid_TSS] = SIGSEGV,
+	[ex_do_segment_not_present] = SIGBUS,
+	[ex_do_stack_segment] = SIGBUS,
+	[ex_do_general_protection] = SIGSEGV,
+	[ex_do_page_fault] = SIGSEGV,
+	[ex_do_spurious_interrupt_bug] = -1,
+	[ex_do_coprocessor_error] = -1,
+	[ex_do_alignment_check] = SIGBUS,
+	[ex_machine_check_vector] = -1,
+	[ex_do_simd_coprocessor_error] = -1,
+	[20 ... 31] = -1,
+	[ex_do_iret_error] = SIGSEGV,
+};
+#endif /* CONFIG_KGDB */
+
+fastcall int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+
+	/* Track the hw interrupt state before calling the Linux
+	 * exception handler, replicating it into the virtual mask. */
+
+	if (irqs_disabled_hw()) {
+		/* Do not trigger the alarm in ipipe_check_context() by using
+		 * plain local_irq_disable(). */
+		__ipipe_stall_root();
+		trace_hardirqs_off();
+		barrier();
+	}
+
+#ifdef CONFIG_KGDB
+	/* catch exception KGDB is interested in over non-root domains */
+	if (!ipipe_root_domain_p &&
+	    __ipipe_xlate_signo[vector] >= 0 &&
+	    !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], error_code, regs)) {
+		local_irq_restore(flags);
+		return 1;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (!ipipe_trap_notify(vector, regs)) {
+		__ipipe_exptr handler = __ipipe_std_extable[vector];
+		handler(regs,error_code);
+		local_irq_restore(flags);
+		__fixup_if(regs);
+		return 0;
+	}
+
+	local_irq_restore(flags);
+
+	return 1;
+}
+
+fastcall int __ipipe_divert_exception(struct pt_regs *regs, int vector)
+{
+#ifdef CONFIG_KGDB
+	/* catch int1 and int3 over non-root domains */
+	if (!ipipe_root_domain_p && vector != ex_device_not_available) {
+		unsigned int condition = 0;
+		if (vector == 1)
+			get_debugreg(condition, 6);
+		if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs))
+			return 1;
+	}
+#endif /* CONFIG_KGDB */
+
+	if (ipipe_trap_notify(vector, regs))
+		return 1;
+
+	__fixup_if(regs);
+
+	return 0;
+}
+
+/* __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic
+   interrupt protection log is maintained here for each domain.  Hw
+   interrupts are off on entry. */
+
+int __ipipe_handle_irq(struct pt_regs regs)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	unsigned irq = regs.orig_eax;
+	struct list_head *head, *pos;
+	int m_ack;
+
+	if ((long)regs.orig_eax < 0) {
+		irq = ~irq;
+		m_ack = 0;
+	} else /* This is a self-triggered interrupt. */
+		m_ack = 1;
+
+	head = __ipipe_pipeline.next;
+	next_domain = list_entry(head, struct ipipe_domain, p_link);
+	if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) {
+		if (!m_ack && next_domain->irqs[irq].acknowledge != NULL)
+			next_domain->irqs[irq].acknowledge(irq);
+		if (likely(__ipipe_dispatch_wired(next_domain, irq))) {
+			goto finalize;
+		} else
+			goto finalize_nosync;
+	}
+
+	this_domain = ipipe_current_domain;
+
+	if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))
+		head = &this_domain->p_link;
+
+	/* Ack the interrupt. */
+
+	pos = head;
+
+	while (pos != &__ipipe_pipeline) {
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		/*
+		 * For each domain handling the incoming IRQ, mark it
+		 * as pending in its log.
+		 */
+		if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) {
+			/*
+			 * Domains that handle this IRQ are polled for
+			 * acknowledging it by decreasing priority
+			 * order. The interrupt must be made pending
+			 * _first_ in the domain's status flags before
+			 * the PIC is unlocked.
+			 */
+			__ipipe_set_irq_pending(next_domain, irq);
+
+			if (!m_ack && next_domain->irqs[irq].acknowledge != NULL)
+				m_ack = next_domain->irqs[irq].acknowledge(irq);
+		}
+
+		/*
+		 * If the domain does not want the IRQ to be passed
+		 * down the interrupt pipe, exit the loop now.
+		 */
+
+		if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control))
+			break;
+
+		pos = next_domain->p_link.next;
+	}
+
+finalize:
+
+	/* Given our deferred dispatching model for regular IRQs, we
+	 * only record CPU regs for the last timer interrupt, so that
+	 * the timer handler charges CPU times properly. It is assumed
+	 * that other interrupt handlers don't actually care for such
+	 * information. */
+
+	if (irq == __ipipe_tick_irq)
+		set_irq_regs(&regs);
+
+	/*
+	 * Now walk the pipeline, yielding control to the highest
+	 * priority domain that has pending interrupt(s) or
+	 * immediately to the current domain if the interrupt has been
+	 * marked as 'sticky'. This search does not go beyond the
+	 * current domain in the pipeline.
+	 */
+
+	__ipipe_walk_pipeline(head);
+
+finalize_nosync:
+
+	if (!ipipe_root_domain_p ||
+	    test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+		return 0;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Prevent a spurious rescheduling from being triggered on
+	 * preemptible kernels along the way out through
+	 * ret_from_intr.
+	 */
+	if ((long)regs.orig_eax < 0)
+		__set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+#endif	/* CONFIG_SMP */
+
+	return 1;
+}
+
+int __ipipe_check_tickdev(const char *devname)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (strcmp(devname, "lapic"))
+		return __ipipe_check_lapic();
+#endif
+
+	return 1;
+}
+
+EXPORT_SYMBOL(__ipipe_tick_irq);
+EXPORT_SYMBOL(ipipe_critical_enter);
+EXPORT_SYMBOL(ipipe_critical_exit);
+EXPORT_SYMBOL(ipipe_trigger_irq);
+EXPORT_SYMBOL(ipipe_get_sysinfo);
+
+EXPORT_SYMBOL_GPL(irq_desc);
+EXPORT_SYMBOL_GPL(__switch_to);
+EXPORT_SYMBOL_GPL(show_stack);
+EXPORT_PER_CPU_SYMBOL_GPL(init_tss);
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+EXPORT_SYMBOL(tasklist_lock);
+#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */
+#ifdef CONFIG_SMP
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate);
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_IPIPE_TRACE_MCOUNT
+void notrace mcount(void);
+EXPORT_SYMBOL(mcount);
+#endif /* CONFIG_IPIPE_TRACE_MCOUNT */
Index: linux-2.6.23/arch/i386/kernel/mcount.S
===================================================================
--- /dev/null
+++ linux-2.6.23/arch/i386/kernel/mcount.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/i386/mcount.S
+ *
+ *  Copyright (C) 2005, 2007 Jan Kiszka
+ */
+
+.globl mcount
+mcount:
+	cmpl $0,ipipe_trace_enable
+	je out
+
+	pushl %ebp
+	movl %esp,%ebp
+
+	pushl %eax
+	pushl %ecx
+	pushl %edx
+
+	pushl $0		# no additional value (v)
+	movl (%ebp),%eax
+	movl 0x4(%ebp),%edx	# __CALLER_ADDR0
+	movl 0x4(%eax),%ecx	# __CALLER_ADDR1
+	movl $0,%eax		# IPIPE_TRACE_FUNC
+	call __ipipe_trace
+	popl %eax
+
+	popl %edx
+	popl %ecx
+	popl %eax
+	popl %ebp
+out:
+	ret
Index: linux-2.6.23/arch/i386/kernel/nmi.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/nmi.c
+++ linux-2.6.23/arch/i386/kernel/nmi.c
@@ -48,6 +48,10 @@ static unsigned int nmi_hz = HZ;
 
 static DEFINE_PER_CPU(short, wd_enabled);
 
+static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned reason);
+int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) = &default_nmi_watchdog_tick;
+EXPORT_SYMBOL(nmi_watchdog_tick);
+
 /* local prototypes */
 static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
 
@@ -317,9 +321,7 @@ void touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
-extern void die_nmi(struct pt_regs *, const char *msg);
-
-__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
+static __kprobes int default_nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
 {
 
 	/*
Index: linux-2.6.23/arch/i386/kernel/process.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/process.c
+++ linux-2.6.23/arch/i386/kernel/process.c
@@ -197,6 +197,7 @@ void cpu_idle(void)
 				play_dead();
 
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ 			ipipe_suspend_domain();
 			idle();
 		}
 		tick_nohz_restart_sched_tick();
Index: linux-2.6.23/arch/i386/kernel/smp.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/smp.c
+++ linux-2.6.23/arch/i386/kernel/smp.c
@@ -142,6 +142,9 @@ void __send_IPI_shortcut(unsigned int sh
 	 * to the APIC.
 	 */
 	unsigned int cfg;
+	unsigned long flags;
+
+	local_irq_save_hw_cond(flags);
 
 	/*
 	 * Wait for idle.
@@ -157,6 +160,8 @@ void __send_IPI_shortcut(unsigned int sh
 	 * Send the IPI. The write to APIC_ICR fires this off.
 	 */
 	apic_write_around(APIC_ICR, cfg);
+
+	local_irq_restore_hw_cond(flags);
 }
 
 void fastcall send_IPI_self(int vector)
@@ -205,10 +210,10 @@ void send_IPI_mask_bitmask(cpumask_t cpu
 	unsigned long mask = cpus_addr(cpumask)[0];
 	unsigned long flags;
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
 	__send_IPI_dest_field(mask, vector);
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 void send_IPI_mask_sequence(cpumask_t mask, int vector)
@@ -222,14 +227,14 @@ void send_IPI_mask_sequence(cpumask_t ma
 	 * should be modified to do 1 message per cluster ID - mbligh
 	 */ 
 
-	local_irq_save(flags);
+	local_irq_save_hw(flags);
 	for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
 		if (cpu_isset(query_cpu, mask)) {
 			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
 					      vector);
 		}
 	}
-	local_irq_restore(flags);
+	local_irq_restore_hw(flags);
 }
 
 #include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
@@ -312,7 +317,9 @@ void leave_mm(unsigned long cpu)
 
 fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
 {
-	unsigned long cpu;
+	unsigned long cpu, flags;
+
+  	local_irq_save_hw_cond(flags);
 
 	cpu = get_cpu();
 
@@ -342,6 +349,7 @@ fastcall void smp_invalidate_interrupt(s
 	smp_mb__after_clear_bit();
 out:
 	put_cpu_no_resched();
+  	local_irq_restore_hw_cond(flags);
 }
 
 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
@@ -395,14 +403,17 @@ void flush_tlb_current_task(void)
 {
 	struct mm_struct *mm = current->mm;
 	cpumask_t cpu_mask;
+ 	unsigned long flags;
 
 	preempt_disable();
+	local_irq_save_hw_cond(flags);
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(smp_processor_id(), cpu_mask);
 
 	local_flush_tlb();
 	if (!cpus_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+ 	local_irq_restore_hw_cond(flags);
 	preempt_enable();
 }
 
@@ -430,8 +441,11 @@ void flush_tlb_page(struct vm_area_struc
 {
 	struct mm_struct *mm = vma->vm_mm;
 	cpumask_t cpu_mask;
+	unsigned long flags;
 
 	preempt_disable();
+	local_irq_save_hw_cond(flags);
+
 	cpu_mask = mm->cpu_vm_mask;
 	cpu_clear(smp_processor_id(), cpu_mask);
 
@@ -442,6 +456,8 @@ void flush_tlb_page(struct vm_area_struc
 		 	leave_mm(smp_processor_id());
 	}
 
+	local_irq_restore_hw_cond(flags);
+
 	if (!cpus_empty(cpu_mask))
 		flush_tlb_others(cpu_mask, mm, va);
 
@@ -603,7 +619,7 @@ native_smp_call_function_mask(cpumask_t 
 
 static void stop_this_cpu (void * dummy)
 {
-	local_irq_disable();
+	local_irq_disable_hw();
 	/*
 	 * Remove this CPU:
 	 */
Index: linux-2.6.23/arch/i386/kernel/smpboot.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/smpboot.c
+++ linux-2.6.23/arch/i386/kernel/smpboot.c
@@ -1306,6 +1306,11 @@ void __init smp_intr_init(void)
 
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+#ifdef CONFIG_IPIPE
+	/* IPI for critical lock */
+	set_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX);
+#endif
 }
 
 /*
Index: linux-2.6.23/arch/i386/kernel/time.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/time.c
+++ linux-2.6.23/arch/i386/kernel/time.c
@@ -165,11 +165,12 @@ irqreturn_t timer_interrupt(int irq, voi
 		 * This will also deassert NMI lines for the watchdog if run
 		 * on an 82489DX-based system.
 		 */
-		spin_lock(&i8259A_lock);
+		unsigned long flags;
+		spin_lock_irqsave_cond(&i8259A_lock,flags);
 		outb(0x0c, PIC_MASTER_OCW3);
 		/* Ack the IRQ; AEOI will end it automatically. */
 		inb(PIC_MASTER_POLL);
-		spin_unlock(&i8259A_lock);
+		spin_unlock_irqrestore_cond(&i8259A_lock,flags);
 	}
 #endif
 
Index: linux-2.6.23/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/traps.c
+++ linux-2.6.23/arch/i386/kernel/traps.c
@@ -320,6 +320,9 @@ void show_registers(struct pt_regs *regs
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, current->pid,
 		current_thread_info(), current, task_thread_info(current));
+#ifdef CONFIG_IPIPE
+	printk(KERN_EMERG "\nI-pipe domain %s", ipipe_current_domain->name);
+#endif /* CONFIG_IPIPE */
 	/*
 	 * When in-kernel, we also print out the stack and code at the
 	 * time of the fault..
@@ -746,6 +749,8 @@ void __kprobes die_nmi(struct pt_regs *r
 	do_exit(SIGSEGV);
 }
 
+EXPORT_SYMBOL(die_nmi);
+
 static __kprobes void default_do_nmi(struct pt_regs * regs)
 {
 	unsigned char reason = 0;
@@ -788,17 +793,21 @@ static int ignore_nmis;
 
 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
-	int cpu;
+	int cpu, cs;
 
 	nmi_enter();
 
 	cpu = smp_processor_id();
 
+	cs = ipipe_disable_context_check(cpu);
+
 	++nmi_count(cpu);
 
 	if (!ignore_nmis)
 		default_do_nmi(regs);
 
+	ipipe_restore_context_check(cpu, cs);
+
 	nmi_exit();
 }
 
@@ -1092,13 +1101,16 @@ asmlinkage void math_state_restore(void)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
+	unsigned long flags;
 
+	local_irq_save_hw_cond(flags);
 	clts();		/* Allow maths ops (or we recurse) */
 	if (!tsk_used_math(tsk))
 		init_fpu(tsk);
 	restore_fpu(tsk);
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
+	local_irq_restore_hw_cond(flags);
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
Index: linux-2.6.23/arch/i386/kernel/vm86.c
===================================================================
--- linux-2.6.23.orig/arch/i386/kernel/vm86.c
+++ linux-2.6.23/arch/i386/kernel/vm86.c
@@ -148,12 +148,14 @@ struct pt_regs * fastcall save_v86_state
 		do_exit(SIGSEGV);
 	}
 
+	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	current->thread.esp0 = current->thread.saved_esp0;
 	current->thread.sysenter_cs = __KERNEL_CS;
 	load_esp0(tss, &current->thread);
 	current->thread.saved_esp0 = 0;
 	put_cpu();
+	local_irq_enable_hw_cond();
 
 	ret = KVM86->regs32;
 
@@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm
 	tsk->thread.saved_fs = info->regs32->xfs;
 	savesegment(gs, tsk->thread.saved_gs);
 
+	local_irq_disable_hw_cond();
 	tss = &per_cpu(init_tss, get_cpu());
 	tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
 	if (cpu_has_sep)
 		tsk->thread.sysenter_cs = 0;
 	load_esp0(tss, &tsk->thread);
 	put_cpu();
+	local_irq_enable_hw_cond();
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
Index: linux-2.6.23/arch/i386/lib/mmx.c
===================================================================
--- linux-2.6.23.orig/arch/i386/lib/mmx.c
+++ linux-2.6.23/arch/i386/lib/mmx.c
@@ -31,7 +31,7 @@ void *_mmx_memcpy(void *to, const void *
 	void *p;
 	int i;
 
-	if (unlikely(in_interrupt()))
+	if (unlikely(!ipipe_root_domain_p || in_interrupt()))
 		return __memcpy(to, from, len);
 
 	p = to;
Index: linux-2.6.23/arch/i386/mach-visws/visws_apic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mach-visws/visws_apic.c
+++ linux-2.6.23/arch/i386/mach-visws/visws_apic.c
@@ -28,7 +28,7 @@
 #include "irq_vectors.h"
 
 
-static DEFINE_SPINLOCK(cobalt_lock);
+static IPIPE_DEFINE_SPINLOCK(cobalt_lock);
 
 /*
  * Set the given Cobalt APIC Redirection Table entry to point
Index: linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mach-voyager/voyager_basic.c
+++ linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c
@@ -185,20 +185,20 @@ voyager_timer_interrupt(void)
 		 * pointy.  */
 		__u16 val;
 
-		spin_lock(&i8253_lock);
+		spin_lock_irqsave(&i8253_lock);
 		
 		outb_p(0x00, 0x43);
 		val = inb_p(0x40);
 		val |= inb(0x40) << 8;
-		spin_unlock(&i8253_lock);
+		spin_unlock_irqrestore(&i8253_lock);
 
 		if(val > LATCH) {
 			printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n", val);
-			spin_lock(&i8253_lock);
+			spin_lock_irqsave(&i8253_lock);
 			outb(0x34,0x43);
 			outb_p(LATCH & 0xff , 0x40);	/* LSB */
 			outb(LATCH >> 8 , 0x40);	/* MSB */
-			spin_unlock(&i8253_lock);
+			spin_unlock_irqrestore(&i8253_lock);
 		}
 	}
 #ifdef CONFIG_SMP
Index: linux-2.6.23/arch/i386/mm/fault.c
===================================================================
--- linux-2.6.23.orig/arch/i386/mm/fault.c
+++ linux-2.6.23/arch/i386/mm/fault.c
@@ -311,6 +311,8 @@ fastcall void __kprobes do_page_fault(st
 	/* get the address */
         address = read_cr2();
 
+	local_irq_enable_hw_cond();
+
 	tsk = current;
 
 	si_code = SEGV_MAPERR;
@@ -655,3 +657,22 @@ void vmalloc_sync_all(void)
 			start = address + PGDIR_SIZE;
 	}
 }
+
+#ifdef CONFIG_IPIPE
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end)
+{
+	unsigned long next, addr = start;
+
+	do {
+		unsigned long flags;
+		struct page *page;
+
+		next = pgd_addr_end(addr, end);
+		spin_lock_irqsave(&pgd_lock, flags);
+		for (page = pgd_list; page; page = (struct page *)page->index)
+			vmalloc_sync_one(page_address(page), addr);
+		spin_unlock_irqrestore(&pgd_lock, flags);
+
+	} while (addr = next, addr != end);
+}
+#endif /* CONFIG_IPIPE */
Index: linux-2.6.23/drivers/pci/htirq.c
===================================================================
--- linux-2.6.23.orig/drivers/pci/htirq.c
+++ linux-2.6.23/drivers/pci/htirq.c
@@ -21,7 +21,7 @@
  * With multiple simultaneous hypertransport irq devices it might pay
  * to make this more fine grained.  But start with simple, stupid, and correct.
  */
-static DEFINE_SPINLOCK(ht_irq_lock);
+static IPIPE_DEFINE_SPINLOCK(ht_irq_lock);
 
 struct ht_irq_cfg {
 	struct pci_dev *dev;
Index: linux-2.6.23/include/asm-i386/apic.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/apic.h
+++ linux-2.6.23/include/asm-i386/apic.h
@@ -79,7 +79,13 @@ int get_physical_broadcast(void);
 # define apic_write_around(x,y) apic_write_atomic((x),(y))
 #endif
 
+#ifdef CONFIG_IPIPE
+#define ack_APIC_irq() do { } while(0)
+static inline void __ack_APIC_irq(void)
+#else /* !CONFIG_IPIPE */
+#define __ack_APIC_irq() ack_APIC_irq()
 static inline void ack_APIC_irq(void)
+#endif /* CONFIG_IPIPE */
 {
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction:
Index: linux-2.6.23/include/asm-i386/hw_irq.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/hw_irq.h
+++ linux-2.6.23/include/asm-i386/hw_irq.h
@@ -40,6 +40,13 @@ fastcall void error_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 #define platform_legacy_irq(irq)	((irq) < 16)
+#ifdef CONFIG_IPIPE
+fastcall void ipipe_ipi0(void);
+fastcall void ipipe_ipi1(void);
+fastcall void ipipe_ipi2(void);
+fastcall void ipipe_ipi3(void);
+fastcall void ipipe_ipiX(void);
+#endif
 #endif
 
 void disable_8259A_irq(unsigned int irq);
Index: linux-2.6.23/include/asm-i386/i8253.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/i8253.h
+++ linux-2.6.23/include/asm-i386/i8253.h
@@ -8,7 +8,7 @@
 #define PIT_CH0			0x40
 #define PIT_CH2			0x42
 
-extern spinlock_t i8253_lock;
+extern ipipe_spinlock_t i8253_lock;
 
 extern struct clock_event_device *global_clock_event;
 
Index: linux-2.6.23/include/asm-i386/i8259.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/i8259.h
+++ linux-2.6.23/include/asm-i386/i8259.h
@@ -7,7 +7,7 @@ extern unsigned int cached_irq_mask;
 #define cached_master_mask	(__byte(0, cached_irq_mask))
 #define cached_slave_mask	(__byte(1, cached_irq_mask))
 
-extern spinlock_t i8259A_lock;
+extern ipipe_spinlock_t i8259A_lock;
 
 extern void init_8259A(int auto_eoi);
 extern void enable_8259A_irq(unsigned int irq);
Index: linux-2.6.23/include/asm-i386/ipipe.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/asm-i386/ipipe.h
@@ -0,0 +1,205 @@
+/*   -*- linux-c -*-
+ *   include/asm-i386/ipipe.h
+ *
+ *   Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __I386_IPIPE_H
+#define __I386_IPIPE_H
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_ARCH_STRING	"1.10-07"
+#define IPIPE_MAJOR_NUMBER	1
+#define IPIPE_MINOR_NUMBER	10
+#define IPIPE_PATCH_NUMBER	7
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cpumask.h>
+#include <linux/list.h>
+#include <linux/threads.h>
+#include <linux/ipipe_percpu.h>
+#include <asm/ptrace.h>
+
+#define ipipe_processor_id()   raw_smp_processor_id()
+
+#define prepare_arch_switch(next)		\
+do {						\
+	ipipe_schedule_notify(current, next);	\
+	local_irq_disable_hw();			\
+} while(0)
+
+#define task_hijacked(p)						\
+	({ int x = !ipipe_root_domain_p; \
+	__clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status));	\
+	local_irq_enable_hw(); x; })
+
+struct ipipe_domain;
+
+struct ipipe_sysinfo {
+
+	int ncpus;		/* Number of CPUs on board */
+	u64 cpufreq;		/* CPU frequency (in Hz) */
+
+	/* Arch-dependent block */
+
+	struct {
+		unsigned tmirq;	/* Timer tick IRQ */
+		u64 tmfreq;	/* Timer frequency */
+	} archdep;
+};
+
+#define ipipe_read_tsc(t)  __asm__ __volatile__("rdtsc" : "=A" (t))
+#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; })
+
+#define ipipe_tsc2ns(t) \
+({ \
+	unsigned long long delta = (t)*1000; \
+	do_div(delta, cpu_khz/1000+1); \
+	(unsigned long)delta; \
+})
+
+#define ipipe_tsc2us(t) \
+({ \
+    unsigned long long delta = (t); \
+    do_div(delta, cpu_khz/1000+1); \
+    (unsigned long)delta; \
+})
+
+/* Private interface -- Internal use only */
+
+#define __ipipe_check_platform()	do { } while(0)
+#define __ipipe_init_platform()		do { } while(0)
+#define __ipipe_enable_irq(irq)		irq_desc[irq].chip->enable(irq)
+#define __ipipe_disable_irq(irq)	irq_desc[irq].chip->disable(irq)
+
+#ifdef CONFIG_SMP
+void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd);
+#else
+#define __ipipe_hook_critical_ipi(ipd) do { } while(0)
+#endif
+
+#define __ipipe_disable_irqdesc(ipd, irq)	do { } while(0)
+
+void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_enable_pipeline(void);
+
+int __ipipe_handle_irq(struct pt_regs regs);
+
+void __ipipe_do_critical_sync(unsigned irq, void *cookie);
+
+extern int __ipipe_tick_irq;
+
+#define __ipipe_call_root_xirq_handler(ipd,irq) \
+   __asm__ __volatile__ ("pushfl\n\t" \
+                         "pushl %%cs\n\t" \
+                         "pushl $1f\n\t" \
+	                 "pushl %%eax\n\t" \
+                         "pushl %%fs\n\t" \
+	                 "pushl %%es\n\t" \
+	                 "pushl %%ds\n\t" \
+	                 "pushl %%eax\n\t" \
+	                 "pushl %%ebp\n\t" \
+	                 "pushl %%edi\n\t" \
+	                 "pushl %%esi\n\t" \
+	                 "pushl %%edx\n\t" \
+	                 "pushl %%ecx\n\t" \
+	                 "pushl %%ebx\n\t" \
+                         "movl  %%esp,%%eax\n\t" \
+                         "call *%1\n\t" \
+	                 "jmp ret_from_intr\n\t" \
+	                 "1: cli\n" \
+			 : /* no output */ \
+			 : "a" (~irq), "m" ((ipd)->irqs[irq].handler))
+
+#define __ipipe_call_root_virq_handler(ipd,irq) \
+   __asm__ __volatile__ ("pushfl\n\t" \
+                         "pushl %%cs\n\t" \
+                         "pushl $__virq_end\n\t" \
+	                 "pushl $-1\n\t" \
+                         "pushl %%fs\n\t" \
+	                 "pushl %%es\n\t" \
+	                 "pushl %%ds\n\t" \
+	                 "pushl %%eax\n\t" \
+	                 "pushl %%ebp\n\t" \
+	                 "pushl %%edi\n\t" \
+	                 "pushl %%esi\n\t" \
+	                 "pushl %%edx\n\t" \
+	                 "pushl %%ecx\n\t" \
+	                 "pushl %%ebx\n\t" \
+			 "pushl %2\n\t" \
+                         "pushl %%eax\n\t" \
+                         "call *%1\n\t" \
+			 "addl $8,%%esp\n\t" \
+			 : /* no output */ \
+			 : "a" (irq), "m" ((ipd)->irqs[irq].handler), "r" ((ipd)->irqs[irq].cookie))
+
+#define __ipipe_finalize_root_virq_handler() \
+   __asm__ __volatile__ ("jmp ret_from_intr\n\t" \
+	                 "__virq_end: cli\n" \
+			 : /* no output */ \
+			 : /* no input */)
+
+static inline unsigned long __ipipe_ffnz(unsigned long ul)
+{
+      __asm__("bsrl %1, %0":"=r"(ul)
+      :	"r"(ul));
+	return ul;
+}
+
+/* When running handlers, enable hw interrupts for all domains but the
+ * one heading the pipeline, so that IRQs can never be significantly
+ * deferred for the latter. */
+#define __ipipe_run_isr(ipd, irq) \
+do { \
+	local_irq_enable_nohead(ipd);				 \
+	if (ipd == ipipe_root_domain) {				 \
+		if (likely(!ipipe_virtual_irq_p(irq))) {	 \
+			__ipipe_call_root_xirq_handler(ipd,irq); \
+		} else {					 \
+			irq_enter();				 \
+			__ipipe_call_root_virq_handler(ipd,irq); \
+			irq_exit();				 \
+			__ipipe_finalize_root_virq_handler();	 \
+		}						\
+	} else {						\
+		__clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
+		ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);	\
+		__set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \
+	}							\
+	local_irq_disable_nohead(ipd);				\
+} while(0)
+
+#endif /* __ASSEMBLY__ */
+
+#define __ipipe_syscall_watched_p(p, sc)	\
+	(((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls)
+
+int __ipipe_check_lapic(void);
+
+int __ipipe_check_tickdev(const char *devname);
+
+#else /* !CONFIG_IPIPE */
+
+#define task_hijacked(p)	0
+
+#endif /* CONFIG_IPIPE */
+
+#endif	/* !__I386_IPIPE_H */
Index: linux-2.6.23/include/asm-i386/ipipe_base.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/asm-i386/ipipe_base.h
@@ -0,0 +1,182 @@
+/* -*- linux-c -*-
+ * include/asm-i386/ipipe_base.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __I386_IPIPE_BASE_H
+#define __I386_IPIPE_BASE_H
+
+#include <linux/threads.h>
+#include <irq_vectors.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+/* System interrupts are mapped beyond the last defined external IRQ
+ * number. */
+#define IPIPE_FIRST_APIC_IRQ	NR_IRQS
+#define IPIPE_NR_XIRQS		(NR_IRQS + 256 - FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_irq_vector(irq)  ((irq) - IPIPE_FIRST_APIC_IRQ + FIRST_SYSTEM_VECTOR)
+#define ipipe_apic_vector_irq(vec)  ((vec) - FIRST_SYSTEM_VECTOR + IPIPE_FIRST_APIC_IRQ)
+/* If the APIC is enabled, then we expose four service vectors in the
+ * APIC space which are freely available to domains. */
+#define IPIPE_SERVICE_VECTOR0	0xf5
+#define IPIPE_SERVICE_IPI0	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0)
+#define IPIPE_SERVICE_VECTOR1	0xf6
+#define IPIPE_SERVICE_IPI1	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1)
+#define IPIPE_SERVICE_VECTOR2	0xf7
+#define IPIPE_SERVICE_IPI2	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2)
+#define IPIPE_SERVICE_VECTOR3	0xf8
+#define IPIPE_SERVICE_IPI3	ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3)
+#define IPIPE_CRITICAL_VECTOR  0xf9	/* SMP-only: used by ipipe_critical_enter/exit() */
+#define IPIPE_CRITICAL_IPI     ipipe_apic_vector_irq(IPIPE_CRITICAL_VECTOR)
+#else	/* !CONFIG_X86_LOCAL_APIC */
+#define IPIPE_NR_XIRQS		NR_IRQS
+#endif	/* !CONFIG_X86_LOCAL_APIC */
+
+#define IPIPE_IRQ_ISHIFT  	5	/* 2^5 for 32bits arch. */
+
+#define ex_do_divide_error		0
+#define ex_do_debug			1
+/* NMI not pipelined. */
+#define ex_do_int3			3
+#define ex_do_overflow			4
+#define ex_do_bounds			5
+#define ex_do_invalid_op		6
+#define ex_device_not_available		7
+/* Double fault not pipelined. */
+#define ex_do_coprocessor_segment_overrun 9
+#define ex_do_invalid_TSS		10
+#define ex_do_segment_not_present	11
+#define ex_do_stack_segment		12
+#define ex_do_general_protection	13
+#define ex_do_page_fault		14
+#define ex_do_spurious_interrupt_bug	15
+#define ex_do_coprocessor_error		16
+#define ex_do_alignment_check		17
+#define ex_machine_check_vector		18
+#define ex_do_simd_coprocessor_error	19
+#define ex_do_iret_error		32
+
+/* IDT fault vectors */
+#define IPIPE_NR_FAULTS		33 /* 32 from IDT + iret_error */
+/* Pseudo-vectors used for kernel events */
+#define IPIPE_FIRST_EVENT	IPIPE_NR_FAULTS
+#define IPIPE_EVENT_SYSCALL	(IPIPE_FIRST_EVENT)
+#define IPIPE_EVENT_SCHEDULE	(IPIPE_FIRST_EVENT + 1)
+#define IPIPE_EVENT_SIGWAKE	(IPIPE_FIRST_EVENT + 2)
+#define IPIPE_EVENT_SETSCHED	(IPIPE_FIRST_EVENT + 3)
+#define IPIPE_EVENT_INIT	(IPIPE_FIRST_EVENT + 4)
+#define IPIPE_EVENT_EXIT	(IPIPE_FIRST_EVENT + 5)
+#define IPIPE_EVENT_CLEANUP	(IPIPE_FIRST_EVENT + 6)
+#define IPIPE_LAST_EVENT	IPIPE_EVENT_CLEANUP
+#define IPIPE_NR_EVENTS		(IPIPE_LAST_EVENT + 1)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/alternative.h>
+
+#ifdef CONFIG_SMP
+
+#define GET_ROOT_STATUS_ADDR					\
+	"pushfl; cli;"						\
+	"movl %%fs:per_cpu__this_cpu_off, %%eax;"		\
+	"lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;"
+#define PUT_ROOT_STATUS_ADDR	"popfl;"
+
+static inline void __ipipe_stall_root(void)
+{
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     LOCK_PREFIX
+			     "btsl $0,(%%eax);"
+			     PUT_ROOT_STATUS_ADDR
+			     : : : "eax", "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     LOCK_PREFIX
+			     "btsl $0,(%%eax);"
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : "eax", "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	int oldbit;
+
+	__asm__ __volatile__(GET_ROOT_STATUS_ADDR
+			     "btl $0,(%%eax);"
+			     "sbbl %0,%0;"
+			     PUT_ROOT_STATUS_ADDR
+			     :"=r" (oldbit)
+			     : : "eax");
+	return oldbit;
+}
+
+#else /* ! CONFIG_SMP */
+
+#if __GNUC__ >= 4
+/* Alias to ipipe_root_cpudom_var(status) */
+extern unsigned long __ipipe_root_status;
+#else
+extern unsigned long *const __ipipe_root_status_addr;
+#define __ipipe_root_status	(*__ipipe_root_status_addr)
+#endif
+
+static inline void __ipipe_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	__asm__ __volatile__("btsl $0,%0;"
+			     :"+m" (*p) : : "memory");
+}
+
+static inline unsigned long __ipipe_test_and_stall_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btsl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit), "+m" (*p)
+			     : : "memory");
+	return oldbit;
+}
+
+static inline unsigned long __ipipe_test_root(void)
+{
+	volatile unsigned long *p = &__ipipe_root_status;
+	int oldbit;
+
+	__asm__ __volatile__("btl $0,%1;"
+			     "sbbl %0,%0;"
+			     :"=r" (oldbit)
+			     :"m" (*p));
+	return oldbit;
+}
+
+#endif	/* CONFIG_SMP */
+
+#endif /* !__ASSEMBLY__ */
+
+#endif	/* !__I386_IPIPE_BASE_H */
Index: linux-2.6.23/include/asm-i386/irqflags.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/irqflags.h
+++ linux-2.6.23/include/asm-i386/irqflags.h
@@ -12,32 +12,62 @@
 #include <asm/processor-flags.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/ipipe_trace.h>
+
 static inline unsigned long native_save_fl(void)
 {
 	unsigned long f;
+#ifdef CONFIG_IPIPE
+	f = (!__ipipe_test_root()) << 9;
+	barrier();
+#else
 	asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
+#endif
 	return f;
 }
 
 static inline void native_restore_fl(unsigned long f)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_restore_root(!(f & X86_EFLAGS_IF));
+#else
 	asm volatile("pushl %0 ; popfl": /* no output */
 			     :"g" (f)
 			     :"memory", "cc");
+#endif
 }
 
 static inline void native_irq_disable(void)
 {
+#ifdef CONFIG_IPIPE
+	ipipe_check_context(ipipe_root_domain);
+	__ipipe_stall_root();
+	barrier();
+#else
 	asm volatile("cli": : :"memory");
+#endif
 }
 
 static inline void native_irq_enable(void)
 {
+#ifdef CONFIG_IPIPE
+	barrier();
+	__ipipe_unstall_root();
+#else
 	asm volatile("sti": : :"memory");
+#endif
 }
 
 static inline void native_safe_halt(void)
 {
+#ifdef CONFIG_IPIPE
+	__ipipe_unstall_root();
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+	ipipe_trace_end(0x8000000E);
+#endif
+#endif
 	asm volatile("sti; hlt": : :"memory");
 }
 
@@ -95,16 +125,31 @@ static inline void halt(void)
  */
 static inline unsigned long __raw_local_irq_save(void)
 {
+#ifdef CONFIG_IPIPE
+	unsigned long flags = (!__ipipe_test_and_stall_root()) << 9;
+	barrier();
+#else
 	unsigned long flags = __raw_local_save_flags();
-
 	raw_local_irq_disable();
-
+#endif
 	return flags;
 }
 
 #else
+
+#ifdef CONFIG_IPIPE
+#define DISABLE_INTERRUPTS(clobbers)	PER_CPU(ipipe_percpu_darray, %eax); btsl $0,(%eax); sti
+#define ENABLE_INTERRUPTS(clobbers)	call __ipipe_unstall_root
+#define ENABLE_INTERRUPTS_HW_COND	sti
+#define DISABLE_INTERRUPTS_HW(clobbers)	cli
+#define ENABLE_INTERRUPTS_HW(clobbers)	sti
+#else /* !CONFIG_IPIPE */
 #define DISABLE_INTERRUPTS(clobbers)	cli
 #define ENABLE_INTERRUPTS(clobbers)	sti
+#define ENABLE_INTERRUPTS_HW_COND
+#define DISABLE_INTERRUPTS_HW(clobbers)	DISABLE_INTERRUPTS(clobbers)
+#define ENABLE_INTERRUPTS_HW(clobbers)	ENABLE_INTERRUPTS(clobbers)
+#endif /* !CONFIG_IPIPE */
 #define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
 #define INTERRUPT_RETURN		iret
 #define GET_CR0_INTO_EAX		movl %cr0, %eax
@@ -115,8 +160,10 @@ static inline unsigned long __raw_local_
 #define raw_local_save_flags(flags) \
 		do { (flags) = __raw_local_save_flags(); } while (0)
 
-#define raw_local_irq_save(flags) \
-		do { (flags) = __raw_local_irq_save(); } while (0)
+#define raw_local_irq_save(flags) do {			\
+		ipipe_check_context(ipipe_root_domain);	\
+		(flags) = __raw_local_irq_save();	\
+	} while (0)
 
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
@@ -129,6 +176,70 @@ static inline int raw_irqs_disabled(void
 
 	return raw_irqs_disabled_flags(flags);
 }
+
+static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real)
+{
+	/* Merge virtual and real interrupt mask bits into a single
+	   32bit word. */
+	return (real & ~(1 << 31)) | ((virt != 0) << 31);
+}
+
+static inline int raw_demangle_irq_bits(unsigned long *x)
+{
+	int virt = (*x & (1 << 31)) != 0;
+	*x &= ~(1L << 31);
+	return virt;
+}
+
+#define local_irq_disable_hw_notrace() \
+	__asm__ __volatile__("cli": : :"memory")
+#define local_irq_enable_hw_notrace() \
+	__asm__ __volatile__("sti": : :"memory")
+#define local_irq_save_hw_notrace(x) \
+	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore_hw_notrace(x) \
+	__asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc")
+
+#define local_save_flags_hw(x)	__asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */)
+#define irqs_disabled_hw()		\
+    ({					\
+	unsigned long x;		\
+	local_save_flags_hw(x);		\
+	raw_irqs_disabled_flags(x);	\
+    })
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define local_irq_disable_hw() do {			\
+		if (!irqs_disabled_hw()) {		\
+			local_irq_disable_hw_notrace();	\
+			ipipe_trace_begin(0x80000000);	\
+		}					\
+	} while (0)
+#define local_irq_enable_hw() do {			\
+		if (irqs_disabled_hw()) {		\
+			ipipe_trace_end(0x80000000);	\
+			local_irq_enable_hw_notrace();	\
+		}					\
+	} while (0)
+#define local_irq_save_hw(x) do {			\
+		local_save_flags_hw(x);			\
+		if (!raw_irqs_disabled_flags(x)) {	\
+			local_irq_disable_hw_notrace();	\
+			ipipe_trace_begin(0x80000001);	\
+		}					\
+	} while (0)
+#define local_irq_restore_hw(x) do {			\
+		if (!raw_irqs_disabled_flags(x))	\
+			ipipe_trace_end(0x80000001);	\
+		local_irq_restore_hw_notrace(x);	\
+	} while (0)
+#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */
+#define local_irq_save_hw(x)	local_irq_save_hw_notrace(x)
+#define local_irq_restore_hw(x)	local_irq_restore_hw_notrace(x)
+#define local_irq_enable_hw()	local_irq_enable_hw_notrace()
+#define local_irq_disable_hw()	local_irq_disable_hw_notrace()
+#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */
+
 #endif /* __ASSEMBLY__ */
 
 /*
Index: linux-2.6.23/include/asm-i386/mmu_context.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/mmu_context.h
+++ linux-2.6.23/include/asm-i386/mmu_context.h
@@ -79,8 +79,11 @@ static inline void switch_mm(struct mm_s
 
 #define activate_mm(prev, next)				\
 	do {						\
+		unsigned long flags;			\
 		paravirt_activate_mm(prev, next);	\
+		local_irq_save_hw_cond(flags);		\
 		switch_mm((prev),(next),NULL);		\
+		local_irq_restore_hw_cond(flags);	\
 	} while(0);
 
 #endif
Index: linux-2.6.23/include/asm-i386/nmi.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/nmi.h
+++ linux-2.6.23/include/asm-i386/nmi.h
@@ -29,7 +29,8 @@ extern void setup_apic_nmi_watchdog (voi
 extern void stop_apic_nmi_watchdog (void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+extern int (*nmi_watchdog_tick)(struct pt_regs * regs, unsigned reason);
+extern void die_nmi(struct pt_regs *, const char *msg);
 
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
Index: linux-2.6.23/include/asm-i386/spinlock.h
===================================================================
--- linux-2.6.23.orig/include/asm-i386/spinlock.h
+++ linux-2.6.23/include/asm-i386/spinlock.h
@@ -54,7 +54,7 @@ static inline void __raw_spin_lock(raw_s
  * NOTE: there's an irqs-on section here, which normally would have to be
  * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
  */
-#ifndef CONFIG_PROVE_LOCKING
+#if !defined(CONFIG_PROVE_LOCKING) && !defined(CONFIG_IPIPE)
 static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	asm volatile(
Index: linux-2.6.23/include/linux/clockchips.h
===================================================================
--- linux-2.6.23.orig/include/linux/clockchips.h
+++ linux-2.6.23/include/linux/clockchips.h
@@ -91,6 +91,7 @@ struct clock_event_device {
 	struct list_head	list;
 	enum clock_event_mode	mode;
 	ktime_t			next_event;
+	int64_t			delta;
 };
 
 /*
Index: linux-2.6.23/include/linux/hardirq.h
===================================================================
--- linux-2.6.23.orig/include/linux/hardirq.h
+++ linux-2.6.23/include/linux/hardirq.h
@@ -146,7 +146,7 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
-#define nmi_enter()		do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit()		do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter()		do { if (ipipe_root_domain_p) { lockdep_off(); __irq_enter(); } } while (0)
+#define nmi_exit()		do { if (ipipe_root_domain_p) { __irq_exit(); lockdep_on(); } } while (0)
 
 #endif /* LINUX_HARDIRQ_H */
Index: linux-2.6.23/include/linux/ipipe.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe.h
@@ -0,0 +1,552 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_H
+#define __LINUX_IPIPE_H
+
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+#include <linux/linkage.h>
+#include <linux/ipipe_base.h>
+#include <linux/ipipe_compat.h>
+#include <asm/ipipe.h>
+
+#ifdef CONFIG_IPIPE
+
+#define IPIPE_VERSION_STRING	IPIPE_ARCH_STRING
+#define IPIPE_RELEASE_NUMBER	((IPIPE_MAJOR_NUMBER << 16) | \
+				 (IPIPE_MINOR_NUMBER <<  8) | \
+				 (IPIPE_PATCH_NUMBER))
+
+#ifndef BROKEN_BUILTIN_RETURN_ADDRESS
+#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0))
+#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1))
+#endif /* !BUILTIN_RETURN_ADDRESS */
+
+#define IPIPE_ROOT_PRIO		100
+#define IPIPE_ROOT_ID		0
+#define IPIPE_ROOT_NPTDKEYS	4	/* Must be <= BITS_PER_LONG */
+
+#define IPIPE_RESET_TIMER	0x1
+#define IPIPE_GRAB_TIMER	0x2
+
+/* Global domain flags */
+#define IPIPE_SPRINTK_FLAG	0	/* Synchronous printk() allowed */
+#define IPIPE_AHEAD_FLAG	1	/* Domain always heads the pipeline */
+
+/* Interrupt control bits */
+#define IPIPE_HANDLE_FLAG	0
+#define IPIPE_PASS_FLAG		1
+#define IPIPE_ENABLE_FLAG	2
+#define IPIPE_DYNAMIC_FLAG	IPIPE_HANDLE_FLAG
+#define IPIPE_STICKY_FLAG	3
+#define IPIPE_SYSTEM_FLAG	4
+#define IPIPE_LOCK_FLAG		5
+#define IPIPE_WIRED_FLAG	6
+#define IPIPE_EXCLUSIVE_FLAG	7
+
+#define IPIPE_HANDLE_MASK	(1 << IPIPE_HANDLE_FLAG)
+#define IPIPE_PASS_MASK		(1 << IPIPE_PASS_FLAG)
+#define IPIPE_ENABLE_MASK	(1 << IPIPE_ENABLE_FLAG)
+#define IPIPE_DYNAMIC_MASK	IPIPE_HANDLE_MASK
+#define IPIPE_STICKY_MASK	(1 << IPIPE_STICKY_FLAG)
+#define IPIPE_SYSTEM_MASK	(1 << IPIPE_SYSTEM_FLAG)
+#define IPIPE_LOCK_MASK		(1 << IPIPE_LOCK_FLAG)
+#define IPIPE_WIRED_MASK	(1 << IPIPE_WIRED_FLAG)
+#define IPIPE_EXCLUSIVE_MASK	(1 << IPIPE_EXCLUSIVE_FLAG)
+
+#define IPIPE_DEFAULT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK)
+#define IPIPE_STDROOT_MASK	(IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK)
+
+#define IPIPE_EVENT_SELF        0x80000000
+
+#define IPIPE_NR_CPUS		NR_CPUS
+
+#define ipipe_current_domain	ipipe_cpu_var(ipipe_percpu_domain)
+
+#define ipipe_virtual_irq_p(irq)	((irq) >= IPIPE_VIRQ_BASE && \
+					 (irq) < IPIPE_NR_IRQS)
+
+typedef void (*ipipe_irq_handler_t)(unsigned irq,
+				    void *cookie);
+
+#define IPIPE_SAME_HANDLER	((ipipe_irq_handler_t)(-1))
+
+typedef int (*ipipe_irq_ackfn_t)(unsigned irq);
+
+typedef int (*ipipe_event_handler_t)(unsigned event,
+				     struct ipipe_domain *from,
+				     void *data);
+struct ipipe_domain {
+
+	int slot;			/* Slot number in percpu domain data array. */
+	struct list_head p_link;	/* Link in pipeline */
+	ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */
+	unsigned long long evself;	/* Self-monitored event bits. */
+
+	struct {
+		unsigned long control;
+		ipipe_irq_ackfn_t acknowledge;
+		ipipe_irq_handler_t handler;
+		void *cookie;
+	} ____cacheline_aligned irqs[IPIPE_NR_IRQS];
+
+	int priority;
+	void *pdd;
+	unsigned long flags;
+	unsigned domid;
+	const char *name;
+	struct mutex mutex;
+};
+
+#define IPIPE_HEAD_PRIORITY	(-1) /* For domains always heading the pipeline */
+
+struct ipipe_domain_attr {
+
+	unsigned domid;		/* Domain identifier -- Magic value set by caller */
+	const char *name;	/* Domain name -- Warning: won't be dup'ed! */
+	int priority;		/* Priority in interrupt pipeline */
+	void (*entry) (void);	/* Domain entry point */
+	void *pdd;		/* Per-domain (opaque) data pointer */
+};
+
+#ifdef CONFIG_SMP
+/* These ops must start and complete on the same CPU: care for
+ * migration. */
+#define set_bit_safe(b, a)						\
+		({ unsigned long __flags;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__set_bit(b, a);					\
+		local_irq_restore_hw_notrace(__flags); })
+#define test_and_set_bit_safe(b, a)					\
+		({ unsigned long __flags, __x;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__x = __test_and_set_bit(b, a);				\
+		local_irq_restore_hw_notrace(__flags); __x; })
+#define clear_bit_safe(b, a)						\
+		({ unsigned long __flags;				\
+		local_irq_save_hw_notrace(__flags);			\
+		__clear_bit(b, a);					\
+		local_irq_restore_hw_notrace(__flags); })
+#else
+#define set_bit_safe(b, a)		set_bit(b, a)
+#define test_and_set_bit_safe(b, a)	test_and_set_bit(b, a)
+#define clear_bit_safe(b, a)		clear_bit(b, a)
+#endif
+
+#define __ipipe_irq_cookie(ipd, irq)		(ipd)->irqs[irq].cookie
+#define __ipipe_irq_handler(ipd, irq)		(ipd)->irqs[irq].handler
+#define __ipipe_cpudata_irq_hits(ipd, cpu, irq)	ipipe_percpudom(ipd, irqall, cpu)[irq]
+
+extern unsigned __ipipe_printk_virq;
+
+extern unsigned long __ipipe_virtual_irq_map;
+
+extern struct list_head __ipipe_pipeline;
+
+extern int __ipipe_event_monitors[];
+
+/* Private interface */
+
+void ipipe_init(void);
+
+#ifdef CONFIG_PROC_FS
+void ipipe_init_proc(void);
+
+#ifdef CONFIG_IPIPE_TRACE
+void __ipipe_init_tracer(void);
+#else /* !CONFIG_IPIPE_TRACE */
+#define __ipipe_init_tracer()       do { } while(0)
+#endif /* CONFIG_IPIPE_TRACE */
+
+#else	/* !CONFIG_PROC_FS */
+#define ipipe_init_proc()	do { } while(0)
+#endif	/* CONFIG_PROC_FS */
+
+void __ipipe_init_stage(struct ipipe_domain *ipd);
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd);
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd);
+
+void __ipipe_flush_printk(unsigned irq, void *cookie);
+
+void fastcall __ipipe_walk_pipeline(struct list_head *pos);
+
+int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head);
+
+int fastcall __ipipe_dispatch_event(unsigned event, void *data);
+
+int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, unsigned irq);
+
+void fastcall __ipipe_sync_stage(unsigned long syncmask);
+
+void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq);
+
+void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq);
+
+void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq);
+
+void __ipipe_pin_range_globally(unsigned long start, unsigned long end);
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_lock(unsigned irq)
+{
+	__ipipe_lock_irq(ipipe_current_domain, ipipe_processor_id(), irq);
+}
+
+/* Must be called hw IRQs off. */
+static inline void ipipe_irq_unlock(unsigned irq)
+{
+	__ipipe_unlock_irq(ipipe_current_domain, irq);
+}
+
+#ifndef __ipipe_sync_pipeline
+#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask)
+#endif
+
+#ifndef __ipipe_run_irqtail
+#define __ipipe_run_irqtail() do { } while(0)
+#endif
+
+#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next)
+
+/*
+ * Keep the following as a macro, so that client code could check for
+ * the support of the invariant pipeline head optimization.
+ */
+#define __ipipe_pipeline_head() list_entry(__ipipe_pipeline.next,struct ipipe_domain,p_link)
+
+#define __ipipe_event_monitored_p(ev) \
+	(__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL << ev)))
+
+#ifdef CONFIG_SMP
+
+cpumask_t __ipipe_set_irq_affinity(unsigned irq,
+				   cpumask_t cpumask);
+
+int fastcall __ipipe_send_ipi(unsigned ipi,
+			      cpumask_t cpumask);
+
+#endif /* CONFIG_SMP */
+
+#define ipipe_sigwake_notify(p)	\
+do {					\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p);		\
+} while(0)
+
+#define ipipe_exit_notify(p)	\
+do {				\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_EXIT,p);		\
+} while(0)
+
+#define ipipe_setsched_notify(p)	\
+do {					\
+	if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \
+		__ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p);		\
+} while(0)
+
+#define ipipe_schedule_notify(prev, next)				\
+do {									\
+	if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) &&		\
+	    __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE))		\
+		__ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next);	\
+} while(0)
+
+#define ipipe_trap_notify(ex, regs)		\
+({						\
+	int ret = 0;				\
+	if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \
+	     ((current)->flags & PF_EVNOTIFY)) &&			\
+	    __ipipe_event_monitored_p(ex))				\
+		ret = __ipipe_dispatch_event(ex, regs);			\
+	ret;								\
+})
+
+static inline void ipipe_init_notify(struct task_struct *p)
+{
+	if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT))
+		__ipipe_dispatch_event(IPIPE_EVENT_INIT,p);
+}
+
+static inline void ipipe_cleanup_notify(struct mm_struct *mm)
+{
+	if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP))
+		__ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm);
+}
+
+/* Public interface */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr);
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd);
+
+void ipipe_suspend_domain(void);
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask);
+
+int ipipe_control_irq(unsigned irq,
+		      unsigned clrmask,
+		      unsigned setmask);
+
+unsigned ipipe_alloc_virq(void);
+
+int ipipe_free_virq(unsigned virq);
+
+int fastcall ipipe_trigger_irq(unsigned irq);
+
+static inline int ipipe_propagate_irq(unsigned irq)
+{
+	return __ipipe_schedule_irq(irq, ipipe_current_domain->p_link.next);
+}
+
+static inline int ipipe_schedule_irq(unsigned irq)
+{
+	return __ipipe_schedule_irq(irq, &ipipe_current_domain->p_link);
+}
+
+void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd);
+
+void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd);
+
+unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd);
+
+void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x);
+
+static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd)
+{
+	return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+}
+
+static inline unsigned long ipipe_test_and_stall_pipeline_head(void)
+{
+	local_irq_disable_hw();
+	return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), status));
+}
+
+void ipipe_unstall_pipeline_head(void);
+
+void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain,
+					    unsigned long x);
+
+static inline void ipipe_restore_pipeline_head(unsigned long x)
+{
+	struct ipipe_domain *head_domain = __ipipe_pipeline_head();
+	/* On some archs, __test_and_set_bit() might return different
+	 * truth value than test_bit(), so we test the exclusive OR of
+	 * both statuses, assuming that the lowest bit is always set in
+	 * the truth value (if this is wrong, the failed optimization will
+	 * be caught in __ipipe_restore_pipeline_head() if
+	 * CONFIG_DEBUG_KERNEL is set). */
+	if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) & 1)
+		__ipipe_restore_pipeline_head(head_domain, x);
+}
+
+#define ipipe_unstall_pipeline() \
+	ipipe_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_unstall_pipeline() \
+	ipipe_test_and_unstall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_pipeline() \
+	ipipe_test_pipeline_from(ipipe_current_domain)
+
+#define ipipe_test_and_stall_pipeline() \
+	ipipe_test_and_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_stall_pipeline() \
+	ipipe_stall_pipeline_from(ipipe_current_domain)
+
+#define ipipe_restore_pipeline(x) \
+	ipipe_restore_pipeline_from(ipipe_current_domain, (x))
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr);
+
+int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo);
+
+unsigned long ipipe_critical_enter(void (*syncfn) (void));
+
+void ipipe_critical_exit(unsigned long flags);
+
+static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd)
+{
+	set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_printk_async(struct ipipe_domain *ipd)
+{
+	clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags);
+}
+
+static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd)
+{
+	/* Must be called hw interrupts off. */
+	__clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status));
+}
+
+#define ipipe_safe_current()					\
+({								\
+	struct task_struct *p;					\
+	p = test_bit(IPIPE_NOSTACK_FLAG,			\
+		     &ipipe_this_cpudom_var(status)) ? &init_task : current; \
+	p; \
+})
+
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler);
+
+cpumask_t ipipe_set_irq_affinity(unsigned irq,
+				 cpumask_t cpumask);
+
+int fastcall ipipe_send_ipi(unsigned ipi,
+			    cpumask_t cpumask);
+
+int ipipe_setscheduler_root(struct task_struct *p,
+			    int policy,
+			    int prio);
+
+int ipipe_reenter_root(struct task_struct *prev,
+		       int policy,
+		       int prio);
+
+int ipipe_alloc_ptdkey(void);
+
+int ipipe_free_ptdkey(int key);
+
+int fastcall ipipe_set_ptd(int key,
+			   void *value);
+
+void fastcall *ipipe_get_ptd(int key);
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk);
+
+#define local_irq_enable_hw_cond()		local_irq_enable_hw()
+#define local_irq_disable_hw_cond()		local_irq_disable_hw()
+#define local_irq_save_hw_cond(flags)		local_irq_save_hw(flags)
+#define local_irq_restore_hw_cond(flags)	local_irq_restore_hw(flags)
+#define local_irq_disable_head()		ipipe_stall_pipeline_head()
+
+#define local_irq_enable_nohead(ipd)			\
+	do {						\
+		if (!__ipipe_pipeline_head_p(ipd))	\
+			local_irq_enable_hw();		\
+	} while(0)
+
+#define local_irq_disable_nohead(ipd)		\
+	do {						\
+		if (!__ipipe_pipeline_head_p(ipd))	\
+			local_irq_disable_hw();		\
+	} while(0)
+
+#define ipipe_root_domain_p		(ipipe_current_domain == ipipe_root_domain)
+
+#else	/* !CONFIG_IPIPE */
+
+#define ipipe_init()			do { } while(0)
+#define ipipe_suspend_domain()		do { } while(0)
+#define ipipe_sigwake_notify(p)		do { } while(0)
+#define ipipe_setsched_notify(p)	do { } while(0)
+#define ipipe_init_notify(p)		do { } while(0)
+#define ipipe_exit_notify(p)		do { } while(0)
+#define ipipe_cleanup_notify(mm)	do { } while(0)
+#define ipipe_trap_notify(t,r)		0
+#define ipipe_init_proc()		do { } while(0)
+#define __ipipe_pin_range_globally(start, end)	do { } while(0)
+
+#define local_irq_enable_hw_cond()		do { } while(0)
+#define local_irq_disable_hw_cond()		do { } while(0)
+#define local_irq_save_hw_cond(flags)		do { (void)(flags); } while(0)
+#define local_irq_restore_hw_cond(flags)	do { } while(0)
+
+#define ipipe_irq_lock(irq)		do { } while(0)
+#define ipipe_irq_unlock(irq)		do { } while(0)
+
+#define ipipe_root_domain_p		1
+#define ipipe_safe_current		current
+
+#define local_irq_disable_head()	local_irq_disable()
+
+#endif	/* CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+#include <linux/cpumask.h>
+#include <asm/system.h>
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0);
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state)
+{
+	per_cpu(ipipe_percpu_context_check, cpu) = old_state;
+}
+
+static inline void ipipe_context_check_off(void)
+{
+	int cpu;
+	for_each_online_cpu(cpu)
+		per_cpu(ipipe_percpu_context_check, cpu) = 0;
+}
+
+#else	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+static inline int ipipe_disable_context_check(int cpu)
+{
+	return 0;
+}
+
+static inline void ipipe_restore_context_check(int cpu, int old_state) { }
+
+static inline void ipipe_context_check_off(void) { }
+
+#endif	/* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#endif	/* !__LINUX_IPIPE_H */
Index: linux-2.6.23/include/linux/ipipe_base.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_base.h
@@ -0,0 +1,77 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_base.h
+ *
+ * Copyright (C) 2002-2007 Philippe Gerum.
+ *               2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_BASE_H
+#define __LINUX_IPIPE_BASE_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/bitops.h>
+#include <asm/ipipe_base.h>
+
+/* Number of virtual IRQs */
+#define IPIPE_NR_VIRQS		BITS_PER_LONG
+/* First virtual IRQ # */
+#define IPIPE_VIRQ_BASE		(((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG)
+/* Total number of IRQ slots */
+#define IPIPE_NR_IRQS		(IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS)
+/* Number of indirect words needed to map the whole IRQ space. */
+#define IPIPE_IRQ_IWORDS	((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define IPIPE_IRQ_IMASK		(BITS_PER_LONG - 1)
+#define IPIPE_IRQMASK_ANY	(~0L)
+#define IPIPE_IRQMASK_VIRT	(IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG))
+
+/* Per-cpu pipeline status */
+#define IPIPE_STALL_FLAG	0	/* Stalls a pipeline stage -- guaranteed at bit #0 */
+#define IPIPE_SYNC_FLAG		1	/* The interrupt syncer is running for the domain */
+#define IPIPE_NOSTACK_FLAG	2	/* Domain currently runs on a foreign stack */
+
+#define IPIPE_STALL_MASK	(1L << IPIPE_STALL_FLAG)
+#define IPIPE_SYNC_MASK		(1L << IPIPE_SYNC_FLAG)
+
+extern struct ipipe_domain ipipe_root;
+
+#define ipipe_root_domain (&ipipe_root)
+
+void __ipipe_unstall_root(void);
+
+void __ipipe_restore_root(unsigned long x);
+
+#define ipipe_preempt_disable(flags)	local_irq_save_hw(flags)
+#define ipipe_preempt_enable(flags)	local_irq_restore_hw(flags)
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+void ipipe_check_context(struct ipipe_domain *border_ipd);
+#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { }
+#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */
+
+#else /* !CONFIG_IPIPE */
+#define ipipe_preempt_disable(flags)	do { \
+						preempt_disable(); \
+						(void)(flags); \
+					while (0)
+#define ipipe_preempt_enable(flags)	preempt_enable()
+#define ipipe_check_context(ipd)	do { } while(0)
+#endif	/* CONFIG_IPIPE */
+
+#endif	/* !__LINUX_IPIPE_BASE_H */
Index: linux-2.6.23/include/linux/ipipe_compat.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_compat.h
@@ -0,0 +1,54 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_compat.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_COMPAT_H
+#define __LINUX_IPIPE_COMPAT_H
+
+#ifdef CONFIG_IPIPE_COMPAT
+/*
+ * OBSOLETE: defined only for backward compatibility. Will be removed
+ * in future releases, please update client code accordingly.
+ */
+
+#ifdef CONFIG_SMP
+#define ipipe_declare_cpuid	int cpuid
+#define ipipe_load_cpuid()	do { \
+					cpuid = ipipe_processor_id();	\
+				} while(0)
+#define ipipe_lock_cpu(flags)	do { \
+					local_irq_save_hw(flags); \
+					cpuid = ipipe_processor_id(); \
+				} while(0)
+#define ipipe_unlock_cpu(flags)	local_irq_restore_hw(flags)
+#define ipipe_get_cpu(flags)	ipipe_lock_cpu(flags)
+#define ipipe_put_cpu(flags)	ipipe_unlock_cpu(flags)
+#else /* !CONFIG_SMP */
+#define ipipe_declare_cpuid	const int cpuid = 0
+#define ipipe_load_cpuid()	do { } while(0)
+#define ipipe_lock_cpu(flags)	local_irq_save_hw(flags)
+#define ipipe_unlock_cpu(flags)	local_irq_restore_hw(flags)
+#define ipipe_get_cpu(flags)	do { (void)(flags); } while(0)
+#define ipipe_put_cpu(flags)	do { } while(0)
+#endif /* CONFIG_SMP */
+
+#endif /* CONFIG_IPIPE_COMPAT */
+
+#endif	/* !__LINUX_IPIPE_COMPAT_H */
Index: linux-2.6.23/include/linux/ipipe_percpu.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_percpu.h
@@ -0,0 +1,69 @@
+/*   -*- linux-c -*-
+ *   include/linux/ipipe_percpu.h
+ *
+ *   Copyright (C) 2007 Philippe Gerum.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_PERCPU_H
+#define __LINUX_IPIPE_PERCPU_H
+
+#include <asm/percpu.h>
+#include <asm/ptrace.h>
+
+struct ipipe_domain;
+
+struct ipipe_percpu_domain_data {
+	unsigned long status;	/* <= Must be first in struct. */
+	unsigned long irqpend_himask;
+	unsigned long irqpend_lomask[IPIPE_IRQ_IWORDS];
+	unsigned long irqheld_mask[IPIPE_IRQ_IWORDS];
+	unsigned long irqall[IPIPE_NR_IRQS];
+	u64 evsync;
+};
+
+#ifdef CONFIG_SMP
+#define ipipe_percpudom(ipd, var, cpu)	\
+	(per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot].var)
+#define ipipe_cpudom_var(ipd, var)	\
+	(__raw_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot].var)
+#else
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]);
+#define ipipe_percpudom(ipd, var, cpu)	\
+	(per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]->var)
+#define ipipe_cpudom_var(ipd, var)	\
+	(__raw_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]->var)
+#endif
+
+DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]);
+
+DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain);
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+DECLARE_PER_CPU(int, ipipe_percpu_context_check);
+#endif
+
+#define ipipe_percpu(var, cpu)		per_cpu(var, cpu)
+#define ipipe_cpu_var(var)		__raw_get_cpu_var(var)
+
+#define ipipe_root_cpudom_var(var)	\
+	__raw_get_cpu_var(ipipe_percpu_darray)[0].var
+
+#define ipipe_this_cpudom_var(var)	\
+	ipipe_cpudom_var(ipipe_current_domain, var)
+
+#endif	/* !__LINUX_IPIPE_PERCPU_H */
Index: linux-2.6.23/include/linux/ipipe_tickdev.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_tickdev.h
@@ -0,0 +1,55 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_tickdev.h
+ *
+ * Copyright (C) 2007 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __LINUX_IPIPE_TICKDEV_H
+#define __LINUX_IPIPE_TICKDEV_H
+
+#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS)
+
+#include <linux/clockchips.h>
+
+struct tick_device;
+
+struct ipipe_tick_device {
+
+	void (*emul_set_mode)(enum clock_event_mode,
+			      struct ipipe_tick_device *tdev);
+	int (*emul_set_tick)(unsigned long delta,
+			     struct ipipe_tick_device *tdev);
+	void (*real_set_mode)(enum clock_event_mode mode,
+			      struct clock_event_device *cdev);
+	int (*real_set_tick)(unsigned long delta,
+			     struct clock_event_device *cdev);
+	struct tick_device *slave;
+};
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct ipipe_tick_device *tdev),
+			  int (*emutick)(unsigned long evt,
+					 struct ipipe_tick_device *tdev),
+			  int cpu);
+
+void ipipe_release_tickdev(int cpu);
+
+#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */
+
+#endif /* !__LINUX_IPIPE_TICKDEV_H */
Index: linux-2.6.23/include/linux/ipipe_trace.h
===================================================================
--- /dev/null
+++ linux-2.6.23/include/linux/ipipe_trace.h
@@ -0,0 +1,65 @@
+/* -*- linux-c -*-
+ * include/linux/ipipe_trace.h
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef _LINUX_IPIPE_TRACE_H
+#define _LINUX_IPIPE_TRACE_H
+
+#ifdef CONFIG_IPIPE
+
+#include <linux/types.h>
+
+void ipipe_trace_begin(unsigned long v);
+void ipipe_trace_end(unsigned long v);
+void ipipe_trace_freeze(unsigned long v);
+void ipipe_trace_special(unsigned char special_id, unsigned long v);
+void ipipe_trace_pid(pid_t pid, short prio);
+
+int ipipe_trace_max_reset(void);
+int ipipe_trace_frozen_reset(void);
+
+#ifdef CONFIG_IPIPE_TRACE_IRQSOFF
+#define ipipe_trace_irq_entry(irq)	ipipe_trace_begin(irq)
+#define ipipe_trace_irq_exit(irq)	ipipe_trace_end(irq)
+#define ipipe_trace_irqsoff()		ipipe_trace_begin(0x80000000UL)
+#define ipipe_trace_irqson()		ipipe_trace_end(0x80000000UL)
+#else
+#define ipipe_trace_irq_entry(irq)	do { } while(0)
+#define ipipe_trace_irq_exit(irq)	do { } while(0)
+#define ipipe_trace_irqsoff()		do { } while(0)
+#define ipipe_trace_irqson()		do { } while(0)
+#endif
+
+#endif /* CONFIG_IPIPE */
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+
+void ipipe_trace_panic_freeze(void);
+void ipipe_trace_panic_dump(void);
+
+#else /* !CONFIG_IPIPE_TRACE_PANIC */
+
+static inline void ipipe_trace_panic_freeze(void) { }
+static inline void ipipe_trace_panic_dump(void) { }
+
+#endif /* !CONFIG_IPIPE_TRACE_PANIC */
+
+#endif	/* !__LINUX_IPIPE_H */
Index: linux-2.6.23/include/linux/irq.h
===================================================================
--- linux-2.6.23.orig/include/linux/irq.h
+++ linux-2.6.23/include/linux/irq.h
@@ -150,6 +150,12 @@ struct irq_chip {
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
+#ifdef CONFIG_IPIPE
+	void			fastcall (*ipipe_ack)(unsigned int irq,
+						      struct irq_desc *desc);
+	void			fastcall (*ipipe_end)(unsigned int irq,
+						      struct irq_desc *desc);
+#endif /* CONFIG_IPIPE */
 	irq_flow_handler_t	handle_irq;
 	struct irq_chip		*chip;
 	struct msi_desc		*msi_desc;
Index: linux-2.6.23/include/linux/kernel.h
===================================================================
--- linux-2.6.23.orig/include/linux/kernel.h
+++ linux-2.6.23/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/ipipe_base.h>
 #include <asm/byteorder.h>
 #include <asm/bug.h>
 
@@ -84,9 +85,12 @@ struct user;
  */
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int cond_resched(void);
-# define might_resched() cond_resched()
+# define might_resched() do { \
+		ipipe_check_context(ipipe_root_domain); \
+		cond_resched(); \
+	} while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() ipipe_check_context(ipipe_root_domain)
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
Index: linux-2.6.23/include/linux/linkage.h
===================================================================
--- linux-2.6.23.orig/include/linux/linkage.h
+++ linux-2.6.23/include/linux/linkage.h
@@ -64,4 +64,8 @@
 #define fastcall
 #endif
 
+#ifndef notrace
+#define notrace		__attribute__((no_instrument_function))
+#endif
+
 #endif
Index: linux-2.6.23/include/linux/mm.h
===================================================================
--- linux-2.6.23.orig/include/linux/mm.h
+++ linux-2.6.23/include/linux/mm.h
@@ -168,6 +168,7 @@ extern unsigned int kobjsize(const void 
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
+#define VM_PINNED	0x08000000	/* Disable faults for the vma */
 
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 
Index: linux-2.6.23/include/linux/preempt.h
===================================================================
--- linux-2.6.23.orig/include/linux/preempt.h
+++ linux-2.6.23/include/linux/preempt.h
@@ -8,6 +8,7 @@
 
 #include <linux/thread_info.h>
 #include <linux/linkage.h>
+#include <linux/ipipe_base.h>
 #include <linux/list.h>
 
 #ifdef CONFIG_DEBUG_PREEMPT
@@ -29,18 +30,21 @@ asmlinkage void preempt_schedule(void);
 
 #define preempt_disable() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	inc_preempt_count(); \
 	barrier(); \
 } while (0)
 
 #define preempt_enable_no_resched() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	barrier(); \
 	dec_preempt_count(); \
 } while (0)
 
 #define preempt_check_resched() \
 do { \
+	ipipe_check_context(ipipe_root_domain); \
 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
 		preempt_schedule(); \
 } while (0)
@@ -54,10 +58,10 @@ do { \
 
 #else
 
-#define preempt_disable()		do { } while (0)
-#define preempt_enable_no_resched()	do { } while (0)
-#define preempt_enable()		do { } while (0)
-#define preempt_check_resched()		do { } while (0)
+#define preempt_disable()		ipipe_check_context(ipipe_root_domain)
+#define preempt_enable_no_resched()	ipipe_check_context(ipipe_root_domain)
+#define preempt_enable()		ipipe_check_context(ipipe_root_domain)
+#define preempt_check_resched()		ipipe_check_context(ipipe_root_domain)
 
 #endif
 
Index: linux-2.6.23/include/linux/sched.h
===================================================================
--- linux-2.6.23.orig/include/linux/sched.h
+++ linux-2.6.23/include/linux/sched.h
@@ -57,6 +57,7 @@ struct sched_param {
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/nodemask.h>
+#include <linux/ipipe.h>
 
 #include <asm/system.h>
 #include <asm/semaphore.h>
@@ -175,6 +176,13 @@ print_cfs_rq(struct seq_file *m, int cpu
 /* in tsk->state again */
 #define TASK_NONINTERACTIVE	64
 #define TASK_DEAD		128
+#ifdef CONFIG_IPIPE
+#define TASK_ATOMICSWITCH	512
+#define TASK_NOWAKEUP          1024
+#else  /* !CONFIG_IPIPE */
+#define TASK_ATOMICSWITCH	0
+#define TASK_NOWAKEUP          0
+#endif /* CONFIG_IPIPE */
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -1184,6 +1192,9 @@ struct task_struct {
 
 	atomic_t fs_excl;	/* holding fs exclusive resources */
 	struct rcu_head rcu;
+#ifdef CONFIG_IPIPE
+	void *ptd[IPIPE_ROOT_NPTDKEYS];
+#endif
 
 	/*
 	 * cache last used pipe for splice
@@ -1334,6 +1345,11 @@ static inline void put_task_struct(struc
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
+#ifdef CONFIG_IPIPE
+#define PF_EVNOTIFY	0x80000000	/* Notify other domains about internal events */
+#else
+#define PF_EVNOTIFY	0
+#endif /* CONFIG_IPIPE */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
Index: linux-2.6.23/include/linux/spinlock.h
===================================================================
--- linux-2.6.23.orig/include/linux/spinlock.h
+++ linux-2.6.23/include/linux/spinlock.h
@@ -172,7 +172,90 @@ do {								\
 #define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
 #define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
 
-#define spin_lock(lock)			_spin_lock(lock)
+#undef TYPE_EQUAL
+#define TYPE_EQUAL(lock, type) \
+	__builtin_types_compatible_p(typeof(lock), type *)
+
+#define PICK_SPINOP(op, lock)						\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t))			\
+		__raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	else if (TYPE_EQUAL(lock, spinlock_t))				\
+		_spin##op((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINOP_RAW(op, lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t))			\
+		__raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	else if (TYPE_EQUAL(lock, spinlock_t))				\
+		__raw_spin##op(&((spinlock_t *)(lock))->raw_lock);	\
+} while (0)
+
+#define PICK_SPINLOCK_IRQ(lock)						\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_lock_irq((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINUNLOCK_IRQ(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_unlock_irq((spinlock_t *)(lock));			\
+} while (0)
+
+#define PICK_SPINLOCK_IRQ_RAW(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		local_irq_disable();					\
+		__raw_spin_lock(&((spinlock_t *)(lock))->raw_lock);	\
+} while (0)
+
+#define PICK_SPINUNLOCK_IRQ_RAW(lock)					\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		__raw_spin_unlock(&((spinlock_t *)(lock))->raw_lock);	\
+		local_irq_enable();					\
+} while (0)
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+extern int __bad_spinlock_type(void);
+
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		(flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		flags = _spin_lock_irqsave((spinlock_t *)(lock));	\
+	else __bad_spinlock_type();					\
+} while (0)
+#else
+#define PICK_SPINLOCK_IRQSAVE(lock, flags)				\
+do {									\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		(flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_lock_irqsave((spinlock_t *)(lock), flags);	\
+} while (0)
+#endif
+
+#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags)				\
+	do {								\
+	if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) {			\
+		__ipipe_spin_unlock_irqrestore(&((__ipipe_spinlock_t *)(lock))->__raw_lock, flags); \
+	} else if (TYPE_EQUAL(lock, spinlock_t))			\
+		_spin_unlock_irqrestore((spinlock_t *)(lock), flags);	\
+} while (0)
+
+#define spin_lock(lock)	PICK_SPINOP(_lock, lock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
@@ -185,7 +268,7 @@ do {								\
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
 
-#define spin_lock_irqsave(lock, flags)	flags = _spin_lock_irqsave(lock)
+#define spin_lock_irqsave(lock, flags)	PICK_SPINLOCK_IRQSAVE(lock, flags)
 #define read_lock_irqsave(lock, flags)	flags = _read_lock_irqsave(lock)
 #define write_lock_irqsave(lock, flags)	flags = _write_lock_irqsave(lock)
 
@@ -199,7 +282,7 @@ do {								\
 
 #else
 
-#define spin_lock_irqsave(lock, flags)	_spin_lock_irqsave(lock, flags)
+#define spin_lock_irqsave(lock, flags)	PICK_SPINLOCK_IRQSAVE(lock, flags)
 #define read_lock_irqsave(lock, flags)	_read_lock_irqsave(lock, flags)
 #define write_lock_irqsave(lock, flags)	_write_lock_irqsave(lock, flags)
 #define spin_lock_irqsave_nested(lock, flags, subclass)	\
@@ -207,7 +290,7 @@ do {								\
 
 #endif
 
-#define spin_lock_irq(lock)		_spin_lock_irq(lock)
+#define spin_lock_irq(lock)		PICK_SPINLOCK_IRQ(lock)
 #define spin_lock_bh(lock)		_spin_lock_bh(lock)
 
 #define read_lock_irq(lock)		_read_lock_irq(lock)
@@ -221,32 +304,40 @@ do {								\
  */
 #if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
 	!defined(CONFIG_SMP)
-# define spin_unlock(lock)		_spin_unlock(lock)
+#define spin_unlock(lock)		PICK_SPINOP(_unlock, lock)
 # define read_unlock(lock)		_read_unlock(lock)
 # define write_unlock(lock)		_write_unlock(lock)
-# define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
-# define read_unlock_irq(lock)		_read_unlock_irq(lock)
-# define write_unlock_irq(lock)		_write_unlock_irq(lock)
-#else
-# define spin_unlock(lock) \
-    do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define read_unlock(lock) \
-    do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define write_unlock(lock) \
-    do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define spin_unlock_irq(lock)			\
+# define spin_unlock_irq(lock)	PICK_SPINUNLOCK_IRQ(lock)
+# define read_unlock_irq(lock)	_read_unlock_irq(lock)
+# define write_unlock_irq(lock)	_write_unlock_irq(lock)
+#else
+# define spin_unlock(lock)			\
 do {						\
-	__raw_spin_unlock(&(lock)->raw_lock);	\
+	PICK_SPINOP_RAW(_unlock, lock); 	\
+	__release(lock);			\
+} while(0)
+# define read_unlock(lock)			\
+do {						\
+	__raw_read_unlock(&(lock)->raw_lock);	\
+	__release(lock);			\
+} while (0)
+# define write_unlock(lock)			\
+do {						\
+	__raw_write_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
-	local_irq_enable();			\
 } while (0)
-# define read_unlock_irq(lock)			\
+# define spin_unlock_irq(lock)		\
+do {						\
+	PICK_SPINUNLOCK_IRQ_RAW(lock);		\
+	__release(lock);			\
+} while(0)
+# define read_unlock_irq(lock)		\
 do {						\
 	__raw_read_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
 	local_irq_enable();			\
 } while (0)
-# define write_unlock_irq(lock)			\
+# define write_unlock_irq(lock)		\
 do {						\
 	__raw_write_unlock(&(lock)->raw_lock);	\
 	__release(lock);			\
@@ -254,8 +345,8 @@ do {						\
 } while (0)
 #endif
 
-#define spin_unlock_irqrestore(lock, flags) \
-					_spin_unlock_irqrestore(lock, flags)
+#define spin_unlock_irqrestore(lock, flags)	\
+					PICK_SPINUNLOCK_IRQRESTORE(lock, flags)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
 #define read_unlock_irqrestore(lock, flags) \
@@ -346,4 +437,29 @@ extern int _atomic_dec_and_lock(atomic_t
  */
 #define spin_can_lock(lock)	(!spin_is_locked(lock))
 
+#ifdef CONFIG_IPIPE
+void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock);
+unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock,
+					     unsigned long x);
+void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock);
+void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x);
+#define spin_lock_irqsave_cond(lock, flags) \
+	spin_lock_irqsave(lock, flags)
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock_irqrestore(lock, flags)
+#else
+#define spin_lock_irqsave_cond(lock, flags) \
+	do { (void)(flags); spin_lock(lock); } while(0)
+#define spin_unlock_irqrestore_cond(lock, flags) \
+	spin_unlock(lock)
+#define __ipipe_spin_lock_irq(lock)		do { } while(0)
+#define __ipipe_spin_unlock_irq(lock)		do { } while(0)
+#define __ipipe_spin_lock_irqsave(lock)		0
+#define __ipipe_spin_unlock_irqrestore(lock, x)	do { (void)(x); } while(0)
+#define __ipipe_spin_unlock_irqbegin(lock)	do { } while(0)
+#define __ipipe_spin_unlock_irqcomplete(x)	do { (void)(x); } while(0)
+#endif
+
 #endif /* __LINUX_SPINLOCK_H */
Index: linux-2.6.23/include/linux/spinlock_types.h
===================================================================
--- linux-2.6.23.orig/include/linux/spinlock_types.h
+++ linux-2.6.23/include/linux/spinlock_types.h
@@ -31,6 +31,10 @@ typedef struct {
 #endif
 } spinlock_t;
 
+typedef struct {
+	raw_spinlock_t __raw_lock;
+} __ipipe_spinlock_t;
+
 #define SPINLOCK_MAGIC		0xdead4ead
 
 typedef struct {
@@ -92,9 +96,19 @@ typedef struct {
  * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate.
  */
 #define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
+#define IPIPE_SPIN_LOCK_UNLOCKED					\
+	(__ipipe_spinlock_t) {	.__raw_lock = __RAW_SPIN_LOCK_UNLOCKED }
 #define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
 
 #define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
 #define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
 
+#ifdef CONFIG_IPIPE
+# define ipipe_spinlock_t	__ipipe_spinlock_t
+# define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED
+#else
+# define ipipe_spinlock_t	spinlock_t
+# define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x)
+#endif
+
 #endif /* __LINUX_SPINLOCK_TYPES_H */
Index: linux-2.6.23/init/Kconfig
===================================================================
--- linux-2.6.23.orig/init/Kconfig
+++ linux-2.6.23/init/Kconfig
@@ -64,6 +64,7 @@ config INIT_ENV_ARG_LIMIT
 
 config LOCALVERSION
 	string "Local version - append to kernel release"
+	default "-ipipe"
 	help
 	  Append an extra string to the end of your kernel version.
 	  This will show up when you type uname, for example.
@@ -668,3 +669,36 @@ config STOP_MACHINE
 	  Need stop_machine() primitive.
 
 source "block/Kconfig"
+
+menu "Real-time sub-system"
+
+comment "WARNING! You enabled APM, CPU Frequency scaling or ACPI 'processor'"
+	depends on APM || CPU_FREQ || ACPI_PROCESSOR
+comment "option. These options are known to cause troubles with Xenomai."
+	depends on APM || CPU_FREQ || ACPI_PROCESSOR
+
+comment "NOTE: Xenomai conflicts with PC speaker support."
+	depends on !X86_TSC && X86 && INPUT_PCSPKR
+comment "(menu Device Drivers/Input device support/Miscellaneous devices)"
+	depends on !X86_TSC && X86 && INPUT_PCSPKR
+
+comment "NOTE: Xenomai conflicts with HPET Timer support."
+	depends on !X86_LOCAL_APIC && X86 && HPET_TIMER
+comment "(menu Processor type and features/HPET Timer Support)"
+	depends on !X86_LOCAL_APIC && X86 && HPET_TIMER
+
+config XENOMAI
+	depends on ((X86_TSC || !X86 || !INPUT_PCSPKR) && (!HPET_TIMER || !X86 || X86_LOCAL_APIC))
+	bool "Xenomai"
+	default y
+        select IPIPE
+
+        help
+          Xenomai is a real-time extension to the Linux kernel. Note
+          that Xenomai relies on Adeos interrupt pipeline (CONFIG_IPIPE
+          option) to be enabled, so enabling this option selects the
+          CONFIG_IPIPE option.
+
+source "arch/i386/xenomai/Kconfig"
+
+endmenu
Index: linux-2.6.23/init/main.c
===================================================================
--- linux-2.6.23.orig/init/main.c
+++ linux-2.6.23/init/main.c
@@ -524,7 +524,7 @@ asmlinkage void __init start_kernel(void
 	unwind_init();
 	lockdep_init();
 
-	local_irq_disable();
+	local_irq_disable_hw();
 	early_boot_irqs_off();
 	early_init_irq_lock_class();
 
@@ -577,6 +577,11 @@ asmlinkage void __init start_kernel(void
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	/*
+	 * We need to wait for the interrupt and time subsystems to be
+	 * initialized before enabling the pipeline.
+	 */
+ 	ipipe_init();
 	profile_init();
 	if (!irqs_disabled())
 		printk("start_kernel(): bug: interrupts were enabled early\n");
@@ -737,6 +742,7 @@ static void __init do_basic_setup(void)
 	usermodehelper_init();
 	driver_init();
 	init_irq_proc();
+ 	ipipe_init_proc();
 	do_initcalls();
 }
 
Index: linux-2.6.23/kernel/Makefile
===================================================================
--- linux-2.6.23.orig/kernel/Makefile
+++ linux-2.6.23/kernel/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_IPIPE) += ipipe/
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
@@ -74,3 +75,5 @@ quiet_cmd_ikconfiggz = IKCFG   $@
 targets += config_data.h
 $(obj)/config_data.h: $(obj)/config_data.gz FORCE
 	$(call if_changed,ikconfiggz)
+
+obj-$(CONFIG_XENOMAI) += xenomai/
Index: linux-2.6.23/kernel/exit.c
===================================================================
--- linux-2.6.23.orig/kernel/exit.c
+++ linux-2.6.23/kernel/exit.c
@@ -973,6 +973,7 @@ fastcall NORET_TYPE void do_exit(long co
 
 	if (group_dead)
 		acct_process();
+ 	ipipe_exit_notify(tsk);
 	exit_sem(tsk);
 	__exit_files(tsk);
 	__exit_fs(tsk);
Index: linux-2.6.23/kernel/fork.c
===================================================================
--- linux-2.6.23.orig/kernel/fork.c
+++ linux-2.6.23/kernel/fork.c
@@ -392,6 +392,7 @@ void mmput(struct mm_struct *mm)
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
 		exit_mmap(mm);
+		ipipe_cleanup_notify(mm);
 		if (!list_empty(&mm->mmlist)) {
 			spin_lock(&mmlist_lock);
 			list_del(&mm->mmlist);
@@ -925,7 +926,7 @@ static inline void copy_flags(unsigned l
 {
 	unsigned long new_flags = p->flags;
 
-	new_flags &= ~PF_SUPERPRIV;
+	new_flags &= ~(PF_SUPERPRIV | PF_EVNOTIFY);
 	new_flags |= PF_FORKNOEXEC;
 	if (!(clone_flags & CLONE_PTRACE))
 		p->ptrace = 0;
@@ -1274,6 +1275,14 @@ static struct task_struct *copy_process(
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
+#ifdef CONFIG_IPIPE
+	{
+	int k;
+
+	for (k = 0; k < IPIPE_ROOT_NPTDKEYS; k++)
+		p->ptd[k] = NULL;
+	}
+#endif /* CONFIG_IPIPE */
 	return p;
 
 bad_fork_cleanup_namespaces:
Index: linux-2.6.23/kernel/ipipe/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Kconfig
@@ -0,0 +1,25 @@
+config IPIPE
+	bool "Interrupt pipeline"
+	default y
+	---help---
+	  Activate this option if you want the interrupt pipeline to be
+	  compiled in.
+
+config IPIPE_DOMAINS
+	int "Max domains"
+	depends on IPIPE
+	default 4
+	---help---
+	The maximum number of I-pipe domains to run concurrently.
+
+config IPIPE_COMPAT
+	bool "Maintain code compatibility with older releases"
+	depends on IPIPE
+	default y
+	---help---
+	Activate this option if you want the compatibility code to be
+	defined, so that older I-pipe clients may use obsolete
+	constructs. WARNING: obsolete code will be eventually
+	deprecated in future I-pipe releases, and removed from the
+	compatibility support as time passes. Please fix I-pipe
+	clients to get rid of such uses as soon as possible.
Index: linux-2.6.23/kernel/ipipe/Kconfig.debug
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Kconfig.debug
@@ -0,0 +1,88 @@
+config IPIPE_DEBUG
+	bool "I-pipe debugging"
+	depends on IPIPE
+
+config IPIPE_DEBUG_CONTEXT
+	bool "Check for illicit cross-domain calls"
+	depends on IPIPE_DEBUG
+	default y
+	---help---
+	  Enable this feature to arm checkpoints in the kernel that
+	  verify the correct invocation context. On entry of critical
+	  Linux services a warning is issued if the caller is not
+	  running over the root domain.
+
+config IPIPE_TRACE
+	bool "Latency tracing"
+	depends on IPIPE_DEBUG
+	select FRAME_POINTER
+	select KALLSYMS
+	select PROC_FS
+	---help---
+	  Activate this option if you want to use per-function tracing of
+	  the kernel. The tracer will collect data via instrumentation
+	  features like the one below or with the help of explicite calls
+	  of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the
+	  in-kernel tracing API. The collected data and runtime control
+	  is available via /proc/ipipe/trace/*.
+
+if IPIPE_TRACE
+
+config IPIPE_TRACE_ENABLE
+	bool "Enable tracing on boot"
+	default y
+	---help---
+	  Disable this option if you want to arm the tracer after booting
+	  manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce
+	  boot time on slow embedded devices due to the tracer overhead.
+
+config IPIPE_TRACE_MCOUNT
+	bool "Instrument function entries"
+	default y
+	---help---
+	  When enabled, records every kernel function entry in the tracer
+	  log. While this slows down the system noticeably, it provides
+	  the highest level of information about the flow of events.
+	  However, it can be switch off in order to record only explicit
+	  I-pipe trace points.
+
+config IPIPE_TRACE_IRQSOFF
+	bool "Trace IRQs-off times"
+	default y
+	---help---
+	  Activate this option if I-pipe shall trace the longest path
+	  with hard-IRQs switched off.
+
+config IPIPE_TRACE_SHIFT
+	int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)"
+	range 10 18
+	default 14
+	---help---
+	  The number of trace points to hold tracing data for each
+	  trace path, as a power of 2.
+
+config IPIPE_TRACE_VMALLOC
+	bool "Use vmalloc'ed trace buffer"
+	default y if EMBEDDED
+	---help---
+	  Instead of reserving static kernel data, the required buffer
+	  is allocated via vmalloc during boot-up when this option is
+	  enabled. This can help to start systems that are low on memory,
+	  but it slightly degrades overall performance. Try this option
+	  when a traced kernel hangs unexpectedly at boot time.
+
+config IPIPE_TRACE_PANIC
+	bool "Enable panic back traces"
+	default y
+	---help---
+	  Provides services to freeze and dump a back trace on panic
+	  situations. This is used on IPIPE_DEBUG_CONTEXT exceptions
+	  as well as ordinary kernel oopses. You can control the number
+	  of printed back trace points via /proc/ipipe/trace.
+
+config IPIPE_TRACE_ENABLE_VALUE
+	int
+	default 0 if !IPIPE_TRACE_ENABLE
+	default 1 if IPIPE_TRACE_ENABLE
+
+endif
Index: linux-2.6.23/kernel/ipipe/Makefile
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/Makefile
@@ -0,0 +1,3 @@
+
+obj-$(CONFIG_IPIPE)	+= core.o
+obj-$(CONFIG_IPIPE_TRACE) += tracer.o
Index: linux-2.6.23/kernel/ipipe/core.c
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/core.c
@@ -0,0 +1,1623 @@
+/* -*- linux-c -*-
+ * linux/kernel/ipipe/core.c
+ *
+ * Copyright (C) 2002-2005 Philippe Gerum.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Architecture-independent I-PIPE core support.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/tick.h>
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#endif	/* CONFIG_PROC_FS */
+#include <linux/ipipe_trace.h>
+#include <linux/ipipe_tickdev.h>
+
+static int __ipipe_ptd_key_count;
+
+static unsigned long __ipipe_ptd_key_map;
+
+static unsigned long __ipipe_domain_slot_map;
+
+struct ipipe_domain ipipe_root;
+
+#ifndef CONFIG_SMP
+/*
+ * Create an alias to the unique root status, so that arch-dep code
+ * may get simple and easy access to this percpu variable.  We also
+ * create an array of pointers to the percpu domain data; this tends
+ * to produce a better code when reaching non-root domains. We make
+ * sure that the early boot code would be able to dereference the
+ * pointer to the root domain data safely by statically initializing
+ * its value (local_irq*() routines depend on this).
+ */
+#if __GNUC__ >= 4
+extern unsigned long __ipipe_root_status
+__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray)))));
+EXPORT_SYMBOL(__ipipe_root_status);
+#else /* __GNUC__ < 4 */
+/*
+ * Work around a GCC 3.x issue making alias symbols unusable as
+ * constant initializers.
+ */
+unsigned long *const __ipipe_root_status_addr = &__raw_get_cpu_var(ipipe_percpu_darray);
+EXPORT_SYMBOL(__ipipe_root_status_addr);
+#endif /* __GNUC__ < 4 */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CONFIG_IPIPE_DOMAINS]) =
+{ [0] = (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_percpu_darray) };
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr);
+#endif /* !CONFIG_SMP */
+
+DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONFIG_IPIPE_DOMAINS]) =
+{ [0] = { .status = IPIPE_STALL_MASK } }; /* Root domain stalled on each CPU at startup. */
+
+DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) = { &ipipe_root };
+
+static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock);
+
+LIST_HEAD(__ipipe_pipeline);
+
+unsigned long __ipipe_virtual_irq_map;
+
+#ifdef CONFIG_PRINTK
+unsigned __ipipe_printk_virq;
+#endif /* CONFIG_PRINTK */
+
+int __ipipe_event_monitors[IPIPE_NR_EVENTS];
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+
+DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+
+static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device);
+
+static void __ipipe_set_tick_mode(enum clock_event_mode mode,
+				  struct clock_event_device *cdev)
+{
+	struct ipipe_tick_device *itd;
+	itd = &per_cpu(ipipe_tick_cpu_device, smp_processor_id());
+	itd->emul_set_mode(mode, itd);
+}
+
+static int __ipipe_set_next_tick(unsigned long evt,
+				 struct clock_event_device *cdev)
+{
+	uint64_t delta_ns = (uint64_t)cdev->delta;
+	struct ipipe_tick_device *itd;
+
+	if (delta_ns > ULONG_MAX)
+		delta_ns = ULONG_MAX;
+
+	itd = &per_cpu(ipipe_tick_cpu_device, smp_processor_id());
+	return itd->emul_set_tick((unsigned long)delta_ns, itd);
+}
+
+int ipipe_request_tickdev(const char *devname,
+			  void (*emumode)(enum clock_event_mode mode,
+					  struct ipipe_tick_device *tdev),
+			  int (*emutick)(unsigned long delta,
+					 struct ipipe_tick_device *tdev),
+			  int cpu)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	unsigned long flags;
+	int status;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		status = -EBUSY;
+		goto out;
+	}
+
+	slave = &per_cpu(tick_cpu_device, cpu);
+
+	if (strcmp(slave->evtdev->name, devname)) {
+		/*
+		 * No conflict so far with the current tick device,
+		 * check whether the requested device is sane and has
+		 * been blessed by the kernel.
+		 */
+		status = __ipipe_check_tickdev(devname) ?
+			CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN;
+		goto out;
+	}
+
+	/*
+	 * Our caller asks for using the same clock event device for
+	 * ticking than we do, let's create a tick emulation device to
+	 * interpose on the set_next_event() method, so that we may
+	 * both manage the device in oneshot mode. Only the tick
+	 * emulation code will actually program the clockchip hardware
+	 * for the next shot, though.
+	 *
+	 * CAUTION: we still have to grab the tick device even when it
+	 * current runs in periodic mode, since the kernel may switch
+	 * to oneshot dynamically (highres/no_hz tick mode).
+	 */
+
+	itd->slave = slave;
+	itd->emul_set_mode = emumode;
+	itd->emul_set_tick = emutick;
+	itd->real_set_mode = slave->evtdev->set_mode;
+	itd->real_set_tick = slave->evtdev->set_next_event;
+	slave->evtdev->set_mode = __ipipe_set_tick_mode;
+	slave->evtdev->set_next_event = __ipipe_set_next_tick;
+	status = slave->evtdev->mode;
+out:
+	ipipe_critical_exit(flags);
+
+	return status;
+}
+
+void ipipe_release_tickdev(int cpu)
+{
+	struct ipipe_tick_device *itd;
+	struct tick_device *slave;
+	unsigned long flags;
+
+	flags = ipipe_critical_enter(NULL);
+
+	itd = &per_cpu(ipipe_tick_cpu_device, cpu);
+
+	if (itd->slave != NULL) {
+		slave = &per_cpu(tick_cpu_device, cpu);
+		slave->evtdev->set_mode = itd->real_set_mode;
+		slave->evtdev->set_next_event = itd->real_set_tick;
+		itd->slave = NULL;
+	}
+
+	ipipe_critical_exit(flags);
+}
+
+#endif /* CONFIG_GENERIC_CLOCKEVENTS */
+
+/*
+ * ipipe_init() -- Initialization routine of the IPIPE layer. Called
+ * by the host kernel early during the boot procedure.
+ */
+void __init ipipe_init(void)
+{
+	struct ipipe_domain *ipd = &ipipe_root;
+
+	__ipipe_check_platform();	/* Do platform dependent checks first. */
+
+	/*
+	 * A lightweight registration code for the root domain. We are
+	 * running on the boot CPU, hw interrupts are off, and
+	 * secondary CPUs are still lost in space.
+	 */
+
+	/* Reserve percpu data slot #0 for the root domain. */
+	ipd->slot = 0;
+	set_bit(0, &__ipipe_domain_slot_map);
+
+	ipd->name = "Linux";
+	ipd->domid = IPIPE_ROOT_ID;
+	ipd->priority = IPIPE_ROOT_PRIO;
+
+	__ipipe_init_stage(ipd);
+
+	INIT_LIST_HEAD(&ipd->p_link);
+	list_add_tail(&ipd->p_link, &__ipipe_pipeline);
+
+	__ipipe_init_platform();
+
+#ifdef CONFIG_PRINTK
+	__ipipe_printk_virq = ipipe_alloc_virq();	/* Cannot fail here. */
+	ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk;
+	ipd->irqs[__ipipe_printk_virq].cookie = NULL;
+	ipd->irqs[__ipipe_printk_virq].acknowledge = NULL;
+	ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK;
+#endif /* CONFIG_PRINTK */
+
+	__ipipe_enable_pipeline();
+
+	printk(KERN_INFO "I-pipe %s: pipeline enabled.\n",
+	       IPIPE_VERSION_STRING);
+}
+
+void __ipipe_init_stage(struct ipipe_domain *ipd)
+{
+	int cpu, n;
+
+	for_each_online_cpu(cpu) {
+
+		ipipe_percpudom(ipd, irqpend_himask, cpu) = 0;
+
+		for (n = 0; n < IPIPE_IRQ_IWORDS; n++) {
+			ipipe_percpudom(ipd, irqpend_lomask, cpu)[n] = 0;
+			ipipe_percpudom(ipd, irqheld_mask, cpu)[n] = 0;
+		}
+
+		for (n = 0; n < IPIPE_NR_IRQS; n++)
+			ipipe_percpudom(ipd, irqall, cpu)[n] = 0;
+
+		ipipe_percpudom(ipd, evsync, cpu) = 0;
+	}
+
+	for (n = 0; n < IPIPE_NR_IRQS; n++) {
+		ipd->irqs[n].acknowledge = NULL;
+		ipd->irqs[n].handler = NULL;
+		ipd->irqs[n].control = IPIPE_PASS_MASK;	/* Pass but don't handle */
+	}
+
+	for (n = 0; n < IPIPE_NR_EVENTS; n++)
+		ipd->evhand[n] = NULL;
+
+	ipd->evself = 0LL;
+	mutex_init(&ipd->mutex);
+
+	__ipipe_hook_critical_ipi(ipd);
+}
+
+void __ipipe_cleanup_domain(struct ipipe_domain *ipd)
+{
+	ipipe_unstall_pipeline_from(ipd);
+
+#ifdef CONFIG_SMP
+	{
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, irqpend_himask, cpu) != 0)
+				cpu_relax();
+		}
+	}
+#else
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = NULL;
+#endif
+
+	clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+}
+
+void __ipipe_unstall_root(void)
+{
+	BUG_ON(!ipipe_root_domain_p);
+
+        local_irq_disable_hw();
+
+        __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status));
+
+        if (unlikely(ipipe_root_cpudom_var(irqpend_himask) != 0))
+                __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+        local_irq_enable_hw();
+}
+
+void __ipipe_restore_root(unsigned long x)
+{
+	BUG_ON(!ipipe_root_domain_p);
+
+	if (x)
+		__ipipe_stall_root();
+	else
+		__ipipe_unstall_root();
+}
+
+void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (__ipipe_pipeline_head_p(ipd))
+		local_irq_disable_hw();
+}
+
+unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long x;
+
+	x = test_and_set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (__ipipe_pipeline_head_p(ipd))
+		local_irq_disable_hw();
+
+	return x;
+}
+
+/*
+ * ipipe_unstall_pipeline_from() -- Unstall the pipeline and
+ * synchronize pending interrupts for a given domain. See
+ * __ipipe_walk_pipeline() for more information.
+ */
+void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	struct list_head *pos;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+
+	if (ipd == ipipe_current_domain)
+		pos = &ipd->p_link;
+	else
+		pos = __ipipe_pipeline.next;
+
+	__ipipe_walk_pipeline(pos);
+
+	if (likely(__ipipe_pipeline_head_p(ipd)))
+		local_irq_enable_hw();
+	else
+		local_irq_restore_hw(flags);
+}
+
+unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd)
+{
+	unsigned long x;
+
+	x = test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status));
+	ipipe_unstall_pipeline_from(ipd);
+
+	return x;
+}
+
+void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd,
+					  unsigned long x)
+{
+	if (x)
+		ipipe_stall_pipeline_from(ipd);
+	else
+		ipipe_unstall_pipeline_from(ipd);
+}
+
+void ipipe_unstall_pipeline_head(void)
+{
+	struct ipipe_domain *head_domain;
+
+	local_irq_disable_hw();
+
+	head_domain = __ipipe_pipeline_head();
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+
+	if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+		if (likely(head_domain == ipipe_current_domain))
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+		else
+			__ipipe_walk_pipeline(&head_domain->p_link);
+        }
+
+	local_irq_enable_hw();
+}
+
+void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_domain, unsigned long x)
+{
+	local_irq_disable_hw();
+
+	if (x) {
+#ifdef CONFIG_DEBUG_KERNEL
+		static int warned;
+		if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) {
+			/*
+			 * Already stalled albeit ipipe_restore_pipeline_head()
+			 * should have detected it? Send a warning once.
+			 */
+			warned = 1;
+			printk(KERN_WARNING
+				   "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n");
+			dump_stack();
+		}
+#else /* !CONFIG_DEBUG_KERNEL */
+		set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+#endif /* CONFIG_DEBUG_KERNEL */
+	}
+	else {
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+		if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) != 0)) {
+			if (likely(head_domain == ipipe_current_domain))
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			else
+				__ipipe_walk_pipeline(&head_domain->p_link);
+		}
+		local_irq_enable_hw();
+	}
+}
+
+void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock)
+{
+	local_irq_disable_hw();
+	__raw_spin_lock(lock);
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock)
+{
+	__raw_spin_unlock(lock);
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_enable_hw();
+}
+
+unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock)
+{
+	unsigned long flags;
+	int s;
+
+	local_irq_save_hw(flags);
+	__raw_spin_lock(lock);
+	s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+	return raw_mangle_irq_bits(s, flags);
+}
+
+void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x)
+{
+	__raw_spin_unlock(lock);
+	if (!raw_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock)
+{
+	__raw_spin_unlock(&lock->__raw_lock);
+}
+
+void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x)
+{
+	if (!raw_demangle_irq_bits(&x))
+		__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+	local_irq_restore_hw(x);
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned irq)
+{
+	int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+
+	if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		__set_bit(rank, &ipipe_cpudom_var(ipd, irqpend_lomask)[level]);
+		__set_bit(level,&ipipe_cpudom_var(ipd, irqpend_himask));
+	} else
+		__set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]);
+
+	ipipe_cpudom_var(ipd, irqall)[irq]++;
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsigned irq)
+{
+	if (likely(!test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+		if (__test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, cpu)[level]))
+			__set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]);
+		if (ipipe_percpudom(ipd, irqpend_lomask, cpu)[level] == 0)
+			__clear_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu));
+	}
+}
+
+/* Must be called hw IRQs off. */
+void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)
+{
+	int cpu;
+
+	if (likely(test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) {
+		int level = irq >> IPIPE_IRQ_ISHIFT, rank = irq & IPIPE_IRQ_IMASK;
+		for_each_online_cpu(cpu) {
+			if (test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqheld_mask, cpu)[level])) {
+				/* We need atomic ops here: */
+				set_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, cpu)[level]);
+				set_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu));
+			}
+		}
+	}
+}
+
+/* __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must
+   be called with local hw interrupts disabled. */
+
+void fastcall __ipipe_walk_pipeline(struct list_head *pos)
+{
+	struct ipipe_domain *this_domain = ipipe_current_domain, *next_domain;
+
+	while (pos != &__ipipe_pipeline) {
+
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status)))
+			break;	/* Stalled stage -- do not go further. */
+
+		if (ipipe_cpudom_var(next_domain, irqpend_himask) != 0) {
+
+			if (next_domain == this_domain)
+				__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			else {
+
+				ipipe_cpudom_var(this_domain, evsync) = 0;
+				ipipe_current_domain = next_domain;
+				ipipe_suspend_domain();	/* Sync stage and propagate interrupts. */
+
+				if (ipipe_current_domain == next_domain)
+					ipipe_current_domain = this_domain;
+				/*
+				 * Otherwise, something changed the current domain under our
+				 * feet recycling the register set; do not override the new
+				 * domain.
+				 */
+
+				if (ipipe_cpudom_var(this_domain, irqpend_himask) != 0 &&
+				    !test_bit(IPIPE_STALL_FLAG,
+					      &ipipe_cpudom_var(this_domain, status)))
+					__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			}
+
+			break;
+		} else if (next_domain == this_domain)
+			break;
+
+		pos = next_domain->p_link.next;
+	}
+}
+
+/*
+ * ipipe_suspend_domain() -- Suspend the current domain, switching to
+ * the next one which has pending work down the pipeline.
+ */
+void ipipe_suspend_domain(void)
+{
+	struct ipipe_domain *this_domain, *next_domain;
+	struct list_head *ln;
+	unsigned long flags;
+
+	local_irq_save_hw(flags);
+
+	this_domain = next_domain = ipipe_current_domain;
+
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(this_domain, status));
+
+	if (ipipe_cpudom_var(this_domain, irqpend_himask) != 0)
+		goto sync_stage;
+
+	for (;;) {
+		ln = next_domain->p_link.next;
+
+		if (ln == &__ipipe_pipeline)
+			break;
+
+		next_domain = list_entry(ln, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG,
+			     &ipipe_cpudom_var(next_domain, status)) != 0)
+			break;
+
+		if (ipipe_cpudom_var(next_domain, irqpend_himask) == 0)
+			continue;
+
+		ipipe_current_domain = next_domain;
+
+sync_stage:
+		__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+		if (ipipe_current_domain != next_domain)
+			/*
+			 * Something has changed the current domain under our
+			 * feet, recycling the register set; take note.
+			 */
+			this_domain = ipipe_current_domain;
+	}
+
+	ipipe_current_domain = this_domain;
+
+	local_irq_restore_hw(flags);
+}
+
+/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt.
+ * Virtual interrupts are handled in exactly the same way than their
+ * hw-generated counterparts wrt pipelining.
+ */
+unsigned ipipe_alloc_virq(void)
+{
+	unsigned long flags, irq = 0;
+	int ipos;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	if (__ipipe_virtual_irq_map != ~0) {
+		ipos = ffz(__ipipe_virtual_irq_map);
+		set_bit(ipos, &__ipipe_virtual_irq_map);
+		irq = ipos + IPIPE_VIRQ_BASE;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return irq;
+}
+
+/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw
+   acknowledge routine) to an interrupt for a given domain. */
+
+int ipipe_virtualize_irq(struct ipipe_domain *ipd,
+			 unsigned irq,
+			 ipipe_irq_handler_t handler,
+			 void *cookie,
+			 ipipe_irq_ackfn_t acknowledge,
+			 unsigned modemask)
+{
+	unsigned long flags;
+	int err;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
+		return -EPERM;
+
+	if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags))
+		/* Silently unwire interrupts for non-heading domains. */
+		modemask &= ~IPIPE_WIRED_MASK;
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	if (handler != NULL) {
+		if (handler == IPIPE_SAME_HANDLER) {
+			handler = ipd->irqs[irq].handler;
+			cookie = ipd->irqs[irq].cookie;
+
+			if (handler == NULL) {
+				err = -EINVAL;
+				goto unlock_and_exit;
+			}
+		} else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 &&
+			   ipd->irqs[irq].handler != NULL) {
+			err = -EBUSY;
+			goto unlock_and_exit;
+		}
+
+		/* Wired interrupts can only be delivered to domains
+		 * always heading the pipeline, and using dynamic
+		 * propagation. */
+
+		if ((modemask & IPIPE_WIRED_MASK) != 0) {
+			if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) {
+				err = -EINVAL;
+				goto unlock_and_exit;
+			}
+			modemask |= (IPIPE_HANDLE_MASK);
+		}
+
+		if ((modemask & IPIPE_STICKY_MASK) != 0)
+			modemask |= IPIPE_HANDLE_MASK;
+	} else
+		modemask &=
+		    ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK |
+		      IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK);
+
+	if (acknowledge == NULL && !ipipe_virtual_irq_p(irq))
+		/* Acknowledge handler unspecified for a hw interrupt:
+		   use the Linux-defined handler instead. */
+		acknowledge = ipipe_root_domain->irqs[irq].acknowledge;
+
+	ipd->irqs[irq].handler = handler;
+	ipd->irqs[irq].cookie = cookie;
+	ipd->irqs[irq].acknowledge = acknowledge;
+	ipd->irqs[irq].control = modemask;
+
+	if (irq < NR_IRQS && handler != NULL && !ipipe_virtual_irq_p(irq)) {
+		__ipipe_enable_irqdesc(ipd, irq);
+
+		if ((modemask & IPIPE_ENABLE_MASK) != 0) {
+			if (ipd != ipipe_current_domain) {
+				/* IRQ enable/disable state is domain-sensitive, so we may
+				   not change it for another domain. What is allowed
+				   however is forcing some domain to handle an interrupt
+				   source, by passing the proper 'ipd' descriptor which
+				   thus may be different from ipipe_current_domain. */
+				err = -EPERM;
+				goto unlock_and_exit;
+			}
+			__ipipe_enable_irq(irq);
+		}
+	}
+
+	err = 0;
+
+      unlock_and_exit:
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return err;
+}
+
+/* ipipe_control_irq() -- Change modes of a pipelined interrupt for
+ * the current domain. */
+
+int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask)
+{
+	struct ipipe_domain *ipd;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS)
+		return -EINVAL;
+
+	ipd = ipipe_current_domain;
+
+	if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK)
+		return -EPERM;
+
+	if (ipd->irqs[irq].handler == NULL)
+		setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	if ((setmask & IPIPE_STICKY_MASK) != 0)
+		setmask |= IPIPE_HANDLE_MASK;
+
+	if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0)	/* If one goes, both go. */
+		clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK);
+
+	spin_lock_irqsave(&__ipipe_pipelock, flags);
+
+	ipd->irqs[irq].control &= ~clrmask;
+	ipd->irqs[irq].control |= setmask;
+
+	if ((setmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_enable_irq(irq);
+	else if ((clrmask & IPIPE_ENABLE_MASK) != 0)
+		__ipipe_disable_irq(irq);
+
+	spin_unlock_irqrestore(&__ipipe_pipelock, flags);
+
+	return 0;
+}
+
+/* __ipipe_dispatch_event() -- Low-level event dispatcher. */
+
+int fastcall __ipipe_dispatch_event (unsigned event, void *data)
+{
+	struct ipipe_domain *start_domain, *this_domain, *next_domain;
+	ipipe_event_handler_t evhand;
+	struct list_head *pos, *npos;
+	unsigned long flags;
+	int propagate = 1;
+
+	local_irq_save_hw(flags);
+
+	start_domain = this_domain = ipipe_current_domain;
+
+	list_for_each_safe(pos, npos, &__ipipe_pipeline) {
+		/*
+		 * Note: Domain migration may occur while running
+		 * event or interrupt handlers, in which case the
+		 * current register set is going to be recycled for a
+		 * different domain than the initiating one. We do
+		 * care for that, always tracking the current domain
+		 * descriptor upon return from those handlers.
+		 */
+		next_domain = list_entry(pos, struct ipipe_domain, p_link);
+
+		/*
+		 * Keep a cached copy of the handler's address since
+		 * ipipe_catch_event() may clear it under our feet.
+		 */
+		evhand = next_domain->evhand[event];
+
+		if (evhand != NULL) {
+			ipipe_current_domain = next_domain;
+			ipipe_cpudom_var(next_domain, evsync) |= (1LL << event);
+			local_irq_restore_hw(flags);
+			propagate = !evhand(event, start_domain, data);
+			local_irq_save_hw(flags);
+			ipipe_cpudom_var(next_domain, evsync) &= ~(1LL << event);
+			if (ipipe_current_domain != next_domain)
+				this_domain = ipipe_current_domain;
+		}
+
+		if (next_domain != ipipe_root_domain &&	/* NEVER sync the root stage here. */
+		    ipipe_cpudom_var(next_domain, irqpend_himask) != 0 &&
+		    !test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status))) {
+			ipipe_current_domain = next_domain;
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+			if (ipipe_current_domain != next_domain)
+				this_domain = ipipe_current_domain;
+		}
+
+		ipipe_current_domain = this_domain;
+
+		if (next_domain == this_domain || !propagate)
+			break;
+	}
+
+	local_irq_restore_hw(flags);
+
+	return !propagate;
+}
+
+/*
+ * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired
+ * interrupts are immediately and unconditionally delivered to the
+ * domain heading the pipeline upon receipt, and such domain must have
+ * been registered as an invariant head for the system (priority ==
+ * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is
+ * to get an extra-fast dispatching path for those IRQs, by relying on
+ * a straightforward logic based on assumptions that must always be
+ * true for invariant head domains.  The following assumptions are
+ * made when dealing with such interrupts:
+ *
+ * 1- Wired interrupts are purely dynamic, i.e. the decision to
+ * propagate them down the pipeline must be done from the head domain
+ * ISR.
+ * 2- Wired interrupts cannot be shared or sticky.
+ * 3- The root domain cannot be an invariant pipeline head, in
+ * consequence of what the root domain cannot handle wired
+ * interrupts.
+ * 4- Wired interrupts must have a valid acknowledge handler for the
+ * head domain (if needed), and in any case, must not rely on handlers
+ * provided by lower priority domains during the acknowledge cycle
+ * (see __ipipe_handle_irq).
+ *
+ * Called with hw interrupts off.
+ */
+
+int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, unsigned irq)
+{
+	struct ipipe_domain *old;
+
+	if (test_bit(IPIPE_LOCK_FLAG, &head_domain->irqs[irq].control)) {
+		/* If we can't process this IRQ right now, we must
+		 * mark it as held, so that it will get played during
+		 * normal log sync when the corresponding interrupt
+		 * source is eventually unlocked. */
+		ipipe_cpudom_var(head_domain, irqall)[irq]++;
+		__set_bit(irq & IPIPE_IRQ_IMASK, &ipipe_cpudom_var(head_domain, irqheld_mask)[irq >> IPIPE_IRQ_ISHIFT]);
+		return 0;
+	}
+
+	if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status))) {
+		__ipipe_set_irq_pending(head_domain, irq);
+		return 0;
+	}
+
+	old = ipipe_current_domain;
+	ipipe_current_domain = head_domain; /* Switch to the head domain. */
+
+	ipipe_cpudom_var(head_domain, irqall)[irq]++;
+	__set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+	head_domain->irqs[irq].handler(irq, head_domain->irqs[irq].cookie); /* Call the ISR. */
+	__ipipe_run_irqtail();
+	__clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));
+
+	/* We expect the caller to start a complete pipeline walk upon
+	 * return, so that propagated interrupts will get played. */
+
+	if (ipipe_current_domain == head_domain)
+		ipipe_current_domain = old; /* Back to the preempted domain. */
+
+	return 1;
+}
+
+/*
+ * __ipipe_sync_stage() -- Flush the pending IRQs for the current
+ * domain (and processor). This routine flushes the interrupt log
+ * (see "Optimistic interrupt protection" from D. Stodolsky et al. for
+ * more on the deferred interrupt scheme). Every interrupt that
+ * occurred while the pipeline was stalled gets played. WARNING:
+ * callers on SMP boxen should always check for CPU migration on
+ * return of this routine. One can control the kind of interrupts
+ * which are going to be sync'ed using the syncmask
+ * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT
+ * plays virtual interrupts only.
+ *
+ * This routine must be called with hw interrupts off.
+ */
+void fastcall __ipipe_sync_stage(unsigned long syncmask)
+{
+	unsigned long mask, submask;
+	struct ipipe_domain *ipd;
+	int level, rank, cpu;
+	unsigned irq;
+
+	if (__test_and_set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status)))
+		return;
+
+	ipd = ipipe_current_domain;
+	cpu = ipipe_processor_id();
+
+	/*
+	 * The policy here is to keep the dispatching code interrupt-free
+	 * by stalling the current stage. If the upper domain handler
+	 * (which we call) wants to re-enable interrupts while in a safe
+	 * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's
+	 * sigaction()), it will have to unstall (then stall again before
+	 * returning to us!) the stage when it sees fit.
+	 */
+	while ((mask = (ipipe_this_cpudom_var(irqpend_himask) & syncmask)) != 0) {
+		level = __ipipe_ffnz(mask);
+
+		while ((submask = ipipe_this_cpudom_var(irqpend_lomask)[level]) != 0) {
+			rank = __ipipe_ffnz(submask);
+			irq = (level << IPIPE_IRQ_ISHIFT) + rank;
+
+			if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) {
+				__clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]);
+				continue;
+			}
+
+			__clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]);
+
+			if (ipipe_this_cpudom_var(irqpend_lomask)[level] == 0)
+				__clear_bit(level, &ipipe_this_cpudom_var(irqpend_himask));
+
+			__set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+
+			if (ipd == ipipe_root_domain)
+				trace_hardirqs_off();
+
+			__ipipe_run_isr(ipd, irq);
+#ifdef CONFIG_SMP
+			{
+				int newcpu = ipipe_processor_id();
+
+				if (newcpu != cpu) {	/* Handle CPU migration. */
+					/*
+					 * We expect any domain to clear the SYNC bit each
+					 * time it switches in a new task, so that preemptions
+					 * and/or CPU migrations (in the SMP case) over the
+					 * ISR do not lock out the log syncer for some
+					 * indefinite amount of time. In the Linux case,
+					 * schedule() handles this (see kernel/sched.c). For
+					 * this reason, we don't bother clearing it here for
+					 * the source CPU in the migration handling case,
+					 * since it must have scheduled another task in by
+					 * now.
+					 */
+					__set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status));
+					cpu = newcpu;
+				}
+			}
+#endif	/* CONFIG_SMP */
+			if (ipd == ipipe_root_domain &&
+			    test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)))
+				trace_hardirqs_on();
+
+			__clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status));
+		}
+	}
+
+	__clear_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status));
+}
+
+/* ipipe_register_domain() -- Link a new domain to the pipeline. */
+
+int ipipe_register_domain(struct ipipe_domain *ipd,
+			  struct ipipe_domain_attr *attr)
+{
+	struct ipipe_domain *_ipd;
+	struct list_head *pos;
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may register a new domain.\n");
+		return -EPERM;
+	}
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY &&
+	    test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags))
+		return -EAGAIN;	/* Cannot override current head. */
+
+	flags = ipipe_critical_enter(NULL);
+
+	pos = NULL;
+	ipd->slot = ffz(__ipipe_domain_slot_map);
+
+	if (ipd->slot < CONFIG_IPIPE_DOMAINS) {
+		set_bit(ipd->slot, &__ipipe_domain_slot_map);
+		list_for_each(pos, &__ipipe_pipeline) {
+			_ipd = list_entry(pos, struct ipipe_domain, p_link);
+			if (_ipd->domid == attr->domid)
+				break;
+		}
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (pos != &__ipipe_pipeline) {
+		if (ipd->slot < CONFIG_IPIPE_DOMAINS)
+			clear_bit(ipd->slot, &__ipipe_domain_slot_map);
+		return -EBUSY;
+	}
+
+#ifndef CONFIG_SMP
+	/*
+	 * Set up the perdomain pointers for direct access to the
+	 * percpu domain data. This saves a costly multiply each time
+	 * we need to refer to the contents of the percpu domain data
+	 * array.
+	 */
+	__raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] = &__raw_get_cpu_var(ipipe_percpu_darray)[ipd->slot];
+#endif
+
+	ipd->name = attr->name;
+	ipd->domid = attr->domid;
+	ipd->pdd = attr->pdd;
+	ipd->flags = 0;
+
+	if (attr->priority == IPIPE_HEAD_PRIORITY) {
+		ipd->priority = INT_MAX;
+		__set_bit(IPIPE_AHEAD_FLAG,&ipd->flags);
+	}
+	else
+		ipd->priority = attr->priority;
+
+	__ipipe_init_stage(ipd);
+
+	INIT_LIST_HEAD(&ipd->p_link);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_add_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	flags = ipipe_critical_enter(NULL);
+
+	list_for_each(pos, &__ipipe_pipeline) {
+		_ipd = list_entry(pos, struct ipipe_domain, p_link);
+		if (ipd->priority > _ipd->priority)
+			break;
+	}
+
+	list_add_tail(&ipd->p_link, pos);
+
+	ipipe_critical_exit(flags);
+
+	printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name);
+
+	/*
+	 * Finally, allow the new domain to perform its initialization
+	 * chores.
+	 */
+
+	if (attr->entry != NULL) {
+		ipipe_current_domain = ipd;
+		attr->entry();
+		ipipe_current_domain = ipipe_root_domain;
+
+		local_irq_save_hw(flags);
+
+		if (ipipe_root_cpudom_var(irqpend_himask) != 0 &&
+		    !test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
+			__ipipe_sync_pipeline(IPIPE_IRQMASK_ANY);
+
+		local_irq_restore_hw(flags);
+	}
+
+	return 0;
+}
+
+/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */
+
+int ipipe_unregister_domain(struct ipipe_domain *ipd)
+{
+	unsigned long flags;
+
+	if (!ipipe_root_domain_p) {
+		printk(KERN_WARNING
+		       "I-pipe: Only the root domain may unregister a domain.\n");
+		return -EPERM;
+	}
+
+	if (ipd == ipipe_root_domain) {
+		printk(KERN_WARNING
+		       "I-pipe: Cannot unregister the root domain.\n");
+		return -EPERM;
+	}
+#ifdef CONFIG_SMP
+	{
+		unsigned irq;
+		int cpu;
+
+		/*
+		 * In the SMP case, wait for the logged events to drain on
+		 * other processors before eventually removing the domain
+		 * from the pipeline.
+		 */
+
+		ipipe_unstall_pipeline_from(ipd);
+
+		flags = ipipe_critical_enter(NULL);
+
+		for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+			clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control);
+			clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control);
+			set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control);
+		}
+
+		ipipe_critical_exit(flags);
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, irqpend_himask, cpu) > 0)
+				cpu_relax();
+		}
+	}
+#endif	/* CONFIG_SMP */
+
+	mutex_lock(&ipd->mutex);
+
+#ifdef CONFIG_PROC_FS
+	__ipipe_remove_domain_proc(ipd);
+#endif /* CONFIG_PROC_FS */
+
+	/*
+	 * Simply remove the domain from the pipeline and we are almost done.
+	 */
+
+	flags = ipipe_critical_enter(NULL);
+	list_del_init(&ipd->p_link);
+	ipipe_critical_exit(flags);
+
+	__ipipe_cleanup_domain(ipd);
+
+	mutex_unlock(&ipd->mutex);
+
+	printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name);
+
+	return 0;
+}
+
+/*
+ * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of
+ * a running interrupt handler to the next domain down the pipeline.
+ * ipipe_schedule_irq() -- Does almost the same as above, but attempts
+ * to pend the interrupt for the current domain first.
+ */
+int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *ln;
+	unsigned long flags;
+
+	if (irq >= IPIPE_NR_IRQS ||
+	    (ipipe_virtual_irq_p(irq)
+	     && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)))
+		return -EINVAL;
+
+	local_irq_save_hw(flags);
+
+	ln = head;
+
+	while (ln != &__ipipe_pipeline) {
+
+		ipd = list_entry(ln, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) {
+			__ipipe_set_irq_pending(ipd, irq);
+			local_irq_restore_hw(flags);
+			return 1;
+		}
+
+		ln = ipd->p_link.next;
+	}
+
+	local_irq_restore_hw(flags);
+
+	return 0;
+}
+
+/* ipipe_free_virq() -- Release a virtual/soft interrupt. */
+
+int ipipe_free_virq(unsigned virq)
+{
+	if (!ipipe_virtual_irq_p(virq))
+		return -EINVAL;
+
+	clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map);
+
+	return 0;
+}
+
+void ipipe_init_attr(struct ipipe_domain_attr *attr)
+{
+	attr->name = "anon";
+	attr->domid = 1;
+	attr->entry = NULL;
+	attr->priority = IPIPE_ROOT_PRIO;
+	attr->pdd = NULL;
+}
+
+/*
+ * ipipe_catch_event() -- Interpose or remove an event handler for a
+ * given domain.
+ */
+ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd,
+					unsigned event,
+					ipipe_event_handler_t handler)
+{
+	ipipe_event_handler_t old_handler;
+	unsigned long flags;
+	int self = 0, cpu;
+
+	if (event & IPIPE_EVENT_SELF) {
+		event &= ~IPIPE_EVENT_SELF;
+		self = 1;
+	}
+
+	if (event >= IPIPE_NR_EVENTS)
+		return NULL;
+
+	flags = ipipe_critical_enter(NULL);
+
+	if (!(old_handler = xchg(&ipd->evhand[event],handler)))	{
+		if (handler) {
+			if (self)
+				ipd->evself |= (1LL << event);
+			else
+				__ipipe_event_monitors[event]++;
+		}
+	}
+	else if (!handler) {
+		if (ipd->evself & (1LL << event))
+			ipd->evself &= ~(1LL << event);
+		else
+			__ipipe_event_monitors[event]--;
+	} else if ((ipd->evself & (1LL << event)) && !self) {
+			__ipipe_event_monitors[event]++;
+			ipd->evself &= ~(1LL << event);
+	} else if (!(ipd->evself & (1LL << event)) && self) {
+			__ipipe_event_monitors[event]--;
+			ipd->evself |= (1LL << event);
+	}
+
+	ipipe_critical_exit(flags);
+
+	if (!handler && ipipe_root_domain_p) {
+		/*
+		 * If we cleared a handler on behalf of the root
+		 * domain, we have to wait for any current invocation
+		 * to drain, since our caller might subsequently unmap
+		 * the target domain. To this aim, this code
+		 * synchronizes with __ipipe_dispatch_event(),
+		 * guaranteeing that either the dispatcher sees a null
+		 * handler in which case it discards the invocation
+		 * (which also prevents from entering a livelock), or
+		 * finds a valid handler and calls it. Symmetrically,
+		 * ipipe_catch_event() ensures that the called code
+		 * won't be unmapped under our feet until the event
+		 * synchronization flag is cleared for the given event
+		 * on all CPUs.
+		 */
+
+		for_each_online_cpu(cpu) {
+			while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event))
+				schedule_timeout_interruptible(HZ / 50);
+		}
+	}
+
+	return old_handler;
+}
+
+cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask)
+{
+#ifdef CONFIG_SMP
+	if (irq >= IPIPE_NR_XIRQS)
+		/* Allow changing affinity of external IRQs only. */
+		return CPU_MASK_NONE;
+
+	if (num_online_cpus() > 1)
+		return __ipipe_set_irq_affinity(irq,cpumask);
+#endif /* CONFIG_SMP */
+
+	return CPU_MASK_NONE;
+}
+
+int fastcall ipipe_send_ipi (unsigned ipi, cpumask_t cpumask)
+
+{
+#ifdef CONFIG_SMP
+	return __ipipe_send_ipi(ipi,cpumask);
+#else /* !CONFIG_SMP */
+	return -EINVAL;
+#endif /* CONFIG_SMP */
+}
+
+int ipipe_alloc_ptdkey (void)
+{
+	unsigned long flags;
+	int key = -1;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) {
+		key = ffz(__ipipe_ptd_key_map);
+		set_bit(key,&__ipipe_ptd_key_map);
+		__ipipe_ptd_key_count++;
+	}
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return key;
+}
+
+int ipipe_free_ptdkey (int key)
+{
+	unsigned long flags;
+
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	spin_lock_irqsave(&__ipipe_pipelock,flags);
+
+	if (test_and_clear_bit(key,&__ipipe_ptd_key_map))
+		__ipipe_ptd_key_count--;
+
+	spin_unlock_irqrestore(&__ipipe_pipelock,flags);
+
+	return 0;
+}
+
+int fastcall ipipe_set_ptd (int key, void *value)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return -EINVAL;
+
+	current->ptd[key] = value;
+
+	return 0;
+}
+
+void fastcall *ipipe_get_ptd (int key)
+
+{
+	if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS)
+		return NULL;
+
+	return current->ptd[key];
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct proc_dir_entry *ipipe_proc_root;
+
+static int __ipipe_version_info_proc(char *page,
+				     char **start,
+				     off_t off, int count, int *eof, void *data)
+{
+	int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING);
+
+	len -= off;
+
+	if (len <= off + count)
+		*eof = 1;
+
+	*start = page + off;
+
+	if(len > count)
+		len = count;
+
+	if(len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_common_info_show(struct seq_file *p, void *data)
+{
+	struct ipipe_domain *ipd = (struct ipipe_domain *)p->private;
+	char handling, stickiness, lockbit, exclusive, virtuality;
+
+	unsigned long ctlbits;
+	unsigned irq;
+
+	seq_printf(p, "       +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n");
+	seq_printf(p, "       |+---- Sticky\n");
+	seq_printf(p, "       ||+--- Locked\n");
+	seq_printf(p, "       |||+-- Exclusive\n");
+	seq_printf(p, "       ||||+- Virtual\n");
+	seq_printf(p, "[IRQ]  |||||\n");
+
+	mutex_lock(&ipd->mutex);
+
+	for (irq = 0; irq < IPIPE_NR_IRQS; irq++) {
+		/* Remember to protect against
+		 * ipipe_virtual_irq/ipipe_control_irq if more fields
+		 * get involved. */
+		ctlbits = ipd->irqs[irq].control;
+
+		if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq))
+			/*
+			 * There might be a hole between the last external
+			 * IRQ and the first virtual one; skip it.
+			 */
+			continue;
+
+		if (ipipe_virtual_irq_p(irq)
+		    && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))
+			/* Non-allocated virtual IRQ; skip it. */
+			continue;
+
+		/*
+		 * Statuses are as follows:
+		 * o "accepted" means handled _and_ passed down the pipeline.
+		 * o "grabbed" means handled, but the interrupt might be
+		 * terminated _or_ passed down the pipeline depending on
+		 * what the domain handler asks for to the I-pipe.
+		 * o "wired" is basically the same as "grabbed", except that
+		 * the interrupt is unconditionally delivered to an invariant
+		 * pipeline head domain.
+		 * o "passed" means unhandled by the domain but passed
+		 * down the pipeline.
+		 * o "discarded" means unhandled and _not_ passed down the
+		 * pipeline. The interrupt merely disappears from the
+		 * current domain down to the end of the pipeline.
+		 */
+		if (ctlbits & IPIPE_HANDLE_MASK) {
+			if (ctlbits & IPIPE_PASS_MASK)
+				handling = 'A';
+			else if (ctlbits & IPIPE_WIRED_MASK)
+				handling = 'W';
+			else
+				handling = 'G';
+		} else if (ctlbits & IPIPE_PASS_MASK)
+			/* Do not output if no major action is taken. */
+			continue;
+		else
+			handling = 'D';
+
+		if (ctlbits & IPIPE_STICKY_MASK)
+			stickiness = 'S';
+		else
+			stickiness = '.';
+
+		if (ctlbits & IPIPE_LOCK_MASK)
+			lockbit = 'L';
+		else
+			lockbit = '.';
+
+		if (ctlbits & IPIPE_EXCLUSIVE_MASK)
+			exclusive = 'X';
+		else
+			exclusive = '.';
+
+		if (ipipe_virtual_irq_p(irq))
+			virtuality = 'V';
+		else
+			virtuality = '.';
+
+		seq_printf(p, " %3u:  %c%c%c%c%c\n",
+			     irq, handling, stickiness, lockbit, exclusive, virtuality);
+	}
+
+	seq_printf(p, "[Domain info]\n");
+
+	seq_printf(p, "id=0x%.8x\n", ipd->domid);
+
+	if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags))
+		seq_printf(p, "priority=topmost\n");
+	else
+		seq_printf(p, "priority=%d\n", ipd->priority);
+
+	mutex_unlock(&ipd->mutex);
+
+	return 0;
+}
+
+static int __ipipe_common_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data);
+}
+
+static struct file_operations __ipipe_info_proc_ops = {
+	.owner		= THIS_MODULE,
+	.open		= __ipipe_common_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void __ipipe_add_domain_proc(struct ipipe_domain *ipd)
+{
+	struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root);
+	if (e) {
+		e->proc_fops = &__ipipe_info_proc_ops;
+		e->data = (void*) ipd;
+	}
+}
+
+void __ipipe_remove_domain_proc(struct ipipe_domain *ipd)
+{
+	remove_proc_entry(ipd->name,ipipe_proc_root);
+}
+
+void __init ipipe_init_proc(void)
+{
+	ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0);
+	create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL);
+	__ipipe_add_domain_proc(ipipe_root_domain);
+
+	__ipipe_init_tracer();
+}
+
+#endif	/* CONFIG_PROC_FS */
+
+#ifdef CONFIG_IPIPE_DEBUG_CONTEXT
+
+DEFINE_PER_CPU(int, ipipe_percpu_context_check) = { 1 };
+
+void ipipe_check_context(struct ipipe_domain *border_ipd)
+{
+	/* Note: We don't make the per_cpu access atomic. We assume that code
+	   which temporarily disables the check does this in atomic context
+	   only. */
+	if (likely(ipipe_current_domain->priority <= border_ipd->priority) ||
+	    !per_cpu(ipipe_percpu_context_check, ipipe_processor_id()))
+		return;
+
+	ipipe_context_check_off();
+
+	ipipe_trace_panic_freeze();
+	ipipe_set_printk_sync(ipipe_current_domain);
+	printk(KERN_ERR "I-pipe: Detected illicit call from domain '%s'\n"
+	       KERN_ERR "        into a service reserved for domain '%s' and "
+			"below.\n",
+	       ipipe_current_domain->name, border_ipd->name);
+	show_stack(NULL, NULL);
+	ipipe_trace_panic_dump();
+}
+
+EXPORT_SYMBOL(ipipe_check_context);
+#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */
+
+EXPORT_SYMBOL(ipipe_virtualize_irq);
+EXPORT_SYMBOL(ipipe_control_irq);
+EXPORT_SYMBOL(ipipe_suspend_domain);
+EXPORT_SYMBOL(ipipe_alloc_virq);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain);
+EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray);
+EXPORT_SYMBOL(ipipe_root);
+EXPORT_SYMBOL(ipipe_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from);
+EXPORT_SYMBOL(ipipe_unstall_pipeline_from);
+EXPORT_SYMBOL(ipipe_restore_pipeline_from);
+EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from);
+EXPORT_SYMBOL(ipipe_unstall_pipeline_head);
+EXPORT_SYMBOL(__ipipe_restore_pipeline_head);
+EXPORT_SYMBOL(__ipipe_unstall_root);
+EXPORT_SYMBOL(__ipipe_restore_root);
+EXPORT_SYMBOL(__ipipe_spin_lock_irq);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irq);
+EXPORT_SYMBOL(__ipipe_spin_lock_irqsave);
+EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore);
+EXPORT_SYMBOL(__ipipe_pipeline);
+EXPORT_SYMBOL(__ipipe_lock_irq);
+EXPORT_SYMBOL(__ipipe_unlock_irq);
+EXPORT_SYMBOL(ipipe_register_domain);
+EXPORT_SYMBOL(ipipe_unregister_domain);
+EXPORT_SYMBOL(ipipe_free_virq);
+EXPORT_SYMBOL(ipipe_init_attr);
+EXPORT_SYMBOL(ipipe_catch_event);
+EXPORT_SYMBOL(ipipe_alloc_ptdkey);
+EXPORT_SYMBOL(ipipe_free_ptdkey);
+EXPORT_SYMBOL(ipipe_set_ptd);
+EXPORT_SYMBOL(ipipe_get_ptd);
+EXPORT_SYMBOL(ipipe_set_irq_affinity);
+EXPORT_SYMBOL(ipipe_send_ipi);
+EXPORT_SYMBOL(__ipipe_schedule_irq);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+EXPORT_SYMBOL(ipipe_request_tickdev);
+EXPORT_SYMBOL(ipipe_release_tickdev);
+#endif
Index: linux-2.6.23/kernel/ipipe/tracer.c
===================================================================
--- /dev/null
+++ linux-2.6.23/kernel/ipipe/tracer.c
@@ -0,0 +1,1342 @@
+/* -*- linux-c -*-
+ * kernel/ipipe/tracer.c
+ *
+ * Copyright (C) 2005 Luotao Fu.
+ *               2005-2007 Jan Kiszka.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/ctype.h>
+#include <linux/vmalloc.h>
+#include <linux/utsrelease.h>
+#include <linux/sched.h>
+#include <linux/ipipe.h>
+#include <asm/uaccess.h>
+
+#define IPIPE_TRACE_PATHS           4 /* <!> Do not lower below 3 */
+#define IPIPE_DEFAULT_ACTIVE        0
+#define IPIPE_DEFAULT_MAX           1
+#define IPIPE_DEFAULT_FROZEN        2
+
+#define IPIPE_TRACE_POINTS          (1 << CONFIG_IPIPE_TRACE_SHIFT)
+#define WRAP_POINT_NO(point)        ((point) & (IPIPE_TRACE_POINTS-1))
+
+#define IPIPE_DEFAULT_PRE_TRACE     10
+#define IPIPE_DEFAULT_POST_TRACE    10
+#define IPIPE_DEFAULT_BACK_TRACE    100
+
+#define IPIPE_DELAY_NOTE            1000  /* in nanoseconds */
+#define IPIPE_DELAY_WARN            10000 /* in nanoseconds */
+
+#define IPIPE_TFLG_NMI_LOCK         0x0001
+#define IPIPE_TFLG_NMI_HIT          0x0002
+#define IPIPE_TFLG_NMI_FREEZE_REQ   0x0004
+
+#define IPIPE_TFLG_HWIRQ_OFF        0x0100
+#define IPIPE_TFLG_FREEZING         0x0200
+#define IPIPE_TFLG_CURRDOM_SHIFT    10   /* bits 10..11: current domain */
+#define IPIPE_TFLG_CURRDOM_MASK     0x0C00
+#define IPIPE_TFLG_DOMSTATE_SHIFT   12   /* bits 12..15: domain stalled? */
+#define IPIPE_TFLG_DOMSTATE_BITS    3
+
+#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \
+	(point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT)))
+#define IPIPE_TFLG_CURRENT_DOMAIN(point) \
+	((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT)
+
+
+struct ipipe_trace_point{
+	short type;
+	short flags;
+	unsigned long eip;
+	unsigned long parent_eip;
+	unsigned long v;
+	unsigned long long timestamp;
+};
+
+struct ipipe_trace_path{
+	volatile int flags;
+	int dump_lock; /* separated from flags due to cross-cpu access */
+	int trace_pos; /* next point to fill */
+	int begin, end; /* finalised path begin and end */
+	int post_trace; /* non-zero when in post-trace phase */
+	unsigned long long length; /* max path length in cycles */
+	unsigned long nmi_saved_eip; /* for deferred requests from NMIs */
+	unsigned long nmi_saved_parent_eip;
+	unsigned long nmi_saved_v;
+	struct ipipe_trace_point point[IPIPE_TRACE_POINTS];
+} ____cacheline_aligned_in_smp;
+
+enum ipipe_trace_type
+{
+	IPIPE_TRACE_FUNC = 0,
+	IPIPE_TRACE_BEGIN,
+	IPIPE_TRACE_END,
+	IPIPE_TRACE_FREEZE,
+	IPIPE_TRACE_SPECIAL,
+	IPIPE_TRACE_PID,
+};
+
+#define IPIPE_TYPE_MASK             0x0007
+#define IPIPE_TYPE_BITS             3
+
+
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+
+static struct ipipe_trace_path *trace_paths[NR_CPUS];
+
+#else /* !CONFIG_IPIPE_TRACE_VMALLOC */
+
+static struct ipipe_trace_path trace_paths[NR_CPUS][IPIPE_TRACE_PATHS] =
+	{ [0 ... NR_CPUS-1] =
+		{ [0 ... IPIPE_TRACE_PATHS-1] =
+			{ .begin = -1, .end = -1 }
+		}
+	};
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+
+int ipipe_trace_enable = 0;
+
+static int active_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_ACTIVE };
+static int max_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_MAX };
+static int frozen_path[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = IPIPE_DEFAULT_FROZEN };
+static IPIPE_DEFINE_SPINLOCK(global_path_lock);
+static int pre_trace = IPIPE_DEFAULT_PRE_TRACE;
+static int post_trace = IPIPE_DEFAULT_POST_TRACE;
+static int back_trace = IPIPE_DEFAULT_BACK_TRACE;
+static int verbose_trace = 1;
+static unsigned long trace_overhead;
+
+static unsigned long trigger_begin;
+static unsigned long trigger_end;
+
+static DEFINE_MUTEX(out_mutex);
+static struct ipipe_trace_path *print_path;
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+static struct ipipe_trace_path *panic_path;
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+static int print_pre_trace;
+static int print_post_trace;
+
+
+static long __ipipe_signed_tsc2us(long long tsc);
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point);
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip);
+
+
+static notrace void
+__ipipe_store_domain_states(struct ipipe_trace_point *point)
+{
+	struct ipipe_domain *ipd;
+	struct list_head *pos;
+	int i = 0;
+
+	list_for_each_prev(pos, &__ipipe_pipeline) {
+		ipd = list_entry(pos, struct ipipe_domain, p_link);
+
+		if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)))
+			point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT);
+
+		if (ipd == ipipe_current_domain)
+			point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT;
+
+		if (++i > IPIPE_TFLG_DOMSTATE_BITS)
+			break;
+	}
+}
+
+static notrace int __ipipe_get_free_trace_path(int old, int cpu_id)
+{
+	int new_active = old;
+	struct ipipe_trace_path *tp;
+
+	do {
+		if (++new_active == IPIPE_TRACE_PATHS)
+			new_active = 0;
+		tp = &trace_paths[cpu_id][new_active];
+	} while ((new_active == max_path[cpu_id]) ||
+	         (new_active == frozen_path[cpu_id]) ||
+	         tp->dump_lock);
+
+	return new_active;
+}
+
+static notrace void
+__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp,
+                          struct ipipe_trace_path *old_tp, int old_pos)
+{
+	int i;
+
+	new_tp->trace_pos = pre_trace+1;
+
+	for (i = new_tp->trace_pos; i > 0; i--)
+		memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)],
+		       &old_tp->point[WRAP_POINT_NO(old_pos-i)],
+		       sizeof(struct ipipe_trace_point));
+
+	/* mark the end (i.e. the point before point[0]) invalid */
+	new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_end(int cpu_id, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = active_path[cpu_id];
+	unsigned long long length;
+
+	/* do we have a new worst case? */
+	length = tp->point[tp->end].timestamp -
+	         tp->point[tp->begin].timestamp;
+	if (length > (trace_paths[cpu_id][max_path[cpu_id]]).length) {
+		/* we need protection here against other cpus trying
+		   to start a proc dump */
+		spin_lock(&global_path_lock);
+
+		/* active path holds new worst case */
+		tp->length = length;
+		max_path[cpu_id] = active;
+
+		/* find next unused trace path */
+		active = __ipipe_get_free_trace_path(active, cpu_id);
+
+		spin_unlock(&global_path_lock);
+
+		tp = &trace_paths[cpu_id][active];
+
+		/* migrate last entries for pre-tracing */
+		__ipipe_migrate_pre_trace(tp, old_tp, pos);
+	}
+
+	return tp;
+}
+
+static notrace struct ipipe_trace_path *
+__ipipe_trace_freeze(int cpu_id, struct ipipe_trace_path *tp, int pos)
+{
+	struct ipipe_trace_path *old_tp = tp;
+	long active = active_path[cpu_id];
+	int i;
+
+	/* frozen paths have no core (begin=end) */
+	tp->begin = tp->end;
+
+	/* we need protection here against other cpus trying
+	 * to set their frozen path or to start a proc dump */
+	spin_lock(&global_path_lock);
+
+	frozen_path[cpu_id] = active;
+
+	/* find next unused trace path */
+	active = __ipipe_get_free_trace_path(active, cpu_id);
+
+	/* check if this is the first frozen path */
+	for (i = 0; i < NR_CPUS; i++) {
+		if ((i != cpu_id) &&
+		    (trace_paths[i][frozen_path[i]].end >= 0))
+			tp->end = -1;
+	}
+
+	spin_unlock(&global_path_lock);
+
+	tp = &trace_paths[cpu_id][active];
+
+	/* migrate last entries for pre-tracing */
+	__ipipe_migrate_pre_trace(tp, old_tp, pos);
+
+	return tp;
+}
+
+void notrace
+__ipipe_trace(enum ipipe_trace_type type, unsigned long eip,
+              unsigned long parent_eip, unsigned long v)
+{
+	struct ipipe_trace_path *tp, *old_tp;
+	int pos, next_pos, begin;
+	struct ipipe_trace_point *point;
+	unsigned long flags;
+	int cpu_id;
+
+	local_irq_save_hw_notrace(flags);
+
+	cpu_id = ipipe_processor_id();
+ restart:
+	tp = old_tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	/* here starts a race window with NMIs - catched below */
+
+	/* check for NMI recursion */
+	if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) {
+		tp->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* first freeze request from NMI context? */
+		if ((type == IPIPE_TRACE_FREEZE) &&
+		    !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) {
+			/* save arguments and mark deferred freezing */
+			tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ;
+			tp->nmi_saved_eip = eip;
+			tp->nmi_saved_parent_eip = parent_eip;
+			tp->nmi_saved_v = v;
+		}
+		return; /* no need for restoring flags inside IRQ */
+	}
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (unlikely(tp != &trace_paths[cpu_id][active_path[cpu_id]])) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	/* get the point buffer */
+	pos = tp->trace_pos;
+	point = &tp->point[pos];
+
+	/* store all trace point data */
+	point->type = type;
+	point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0;
+	point->eip = eip;
+	point->parent_eip = parent_eip;
+	point->v = v;
+	ipipe_read_tsc(point->timestamp);
+
+	__ipipe_store_domain_states(point);
+
+	/* forward to next point buffer */
+	next_pos = WRAP_POINT_NO(pos+1);
+	tp->trace_pos = next_pos;
+
+	/* only mark beginning if we haven't started yet */
+	begin = tp->begin;
+	if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0))
+		tp->begin = pos;
+
+	/* end of critical path, start post-trace if not already started */
+	if (unlikely(type == IPIPE_TRACE_END) &&
+	    (begin >= 0) && !tp->post_trace)
+		tp->post_trace = post_trace + 1;
+
+	/* freeze only if the slot is free and we are not already freezing */
+	if ((unlikely(type == IPIPE_TRACE_FREEZE) ||
+	     (unlikely(eip >= trigger_begin && eip <= trigger_end) &&
+	     type == IPIPE_TRACE_FUNC)) &&
+	    (trace_paths[cpu_id][frozen_path[cpu_id]].begin < 0) &&
+	    !(tp->flags & IPIPE_TFLG_FREEZING)) {
+		tp->post_trace = post_trace + 1;
+		tp->flags |= IPIPE_TFLG_FREEZING;
+	}
+
+	/* enforce end of trace in case of overflow */
+	if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) {
+		tp->end = pos;
+		goto enforce_end;
+	}
+
+	/* stop tracing this path if we are in post-trace and
+	 *  a) that phase is over now or
+	 *  b) a new TRACE_BEGIN came in but we are not freezing this path */
+	if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) ||
+	             ((type == IPIPE_TRACE_BEGIN) &&
+	              !(tp->flags & IPIPE_TFLG_FREEZING))))) {
+		/* store the path's end (i.e. excluding post-trace) */
+		tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace);
+
+ enforce_end:
+		if (tp->flags & IPIPE_TFLG_FREEZING)
+			tp = __ipipe_trace_freeze(cpu_id, tp, pos);
+		else
+			tp = __ipipe_trace_end(cpu_id, tp, pos);
+
+		/* reset the active path, maybe already start a new one */
+		tp->begin = (type == IPIPE_TRACE_BEGIN) ?
+			WRAP_POINT_NO(tp->trace_pos - 1) : -1;
+		tp->end = -1;
+		tp->post_trace = 0;
+		tp->flags = 0;
+
+		/* update active_path not earlier to avoid races with NMIs */
+		active_path[cpu_id] = tp - trace_paths[cpu_id];
+	}
+
+	/* we still have old_tp and point,
+	 * let's reset NMI lock and check for catches */
+	old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+	if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) {
+		/* well, this late tagging may not immediately be visible for
+		 * other cpus already dumping this path - a minor issue */
+		point->flags |= IPIPE_TFLG_NMI_HIT;
+
+		/* handle deferred freezing from NMI context */
+		if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+			__ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip,
+			              old_tp->nmi_saved_parent_eip,
+			              old_tp->nmi_saved_v);
+	}
+
+	local_irq_restore_hw_notrace(flags);
+}
+
+static unsigned long __ipipe_global_path_lock(void)
+{
+	unsigned long flags;
+	int cpu_id;
+	struct ipipe_trace_path *tp;
+
+	spin_lock_irqsave(&global_path_lock, flags);
+
+	cpu_id = ipipe_processor_id();
+ restart:
+	tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	/* here is small race window with NMIs - catched below */
+
+	/* clear NMI events and set lock (atomically per cpu) */
+	tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT |
+	                           IPIPE_TFLG_NMI_FREEZE_REQ))
+	                       | IPIPE_TFLG_NMI_LOCK;
+
+	/* check active_path again - some nasty NMI may have switched
+	 * it meanwhile */
+	if (tp != &trace_paths[cpu_id][active_path[cpu_id]]) {
+		/* release lock on wrong path and restart */
+		tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+		/* there is no chance that the NMI got deferred
+		 * => no need to check for pending freeze requests */
+		goto restart;
+	}
+
+	return flags;
+}
+
+static void __ipipe_global_path_unlock(unsigned long flags)
+{
+	int cpu_id;
+	struct ipipe_trace_path *tp;
+
+	/* release spinlock first - it's not involved in the NMI issue */
+	__ipipe_spin_unlock_irqbegin(&global_path_lock);
+
+	cpu_id = ipipe_processor_id();
+	tp = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	tp->flags &= ~IPIPE_TFLG_NMI_LOCK;
+
+	/* handle deferred freezing from NMI context */
+	if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)
+		__ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip,
+		              tp->nmi_saved_parent_eip, tp->nmi_saved_v);
+
+	/* See __ipipe_spin_lock_irqsave() and friends. */
+	__ipipe_spin_unlock_irqcomplete(flags);
+}
+
+void notrace ipipe_trace_begin(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_begin);
+
+void notrace ipipe_trace_end(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_end);
+
+void notrace ipipe_trace_freeze(unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_freeze);
+
+void notrace ipipe_trace_special(unsigned char id, unsigned long v)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, v);
+}
+EXPORT_SYMBOL(ipipe_trace_special);
+
+void notrace ipipe_trace_pid(pid_t pid, short prio)
+{
+	if (!ipipe_trace_enable)
+		return;
+	__ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS),
+	              __BUILTIN_RETURN_ADDRESS0,
+	              __BUILTIN_RETURN_ADDRESS1, pid);
+}
+EXPORT_SYMBOL(ipipe_trace_pid);
+
+int ipipe_trace_max_reset(void)
+{
+	int cpu_id;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) {
+		path = &trace_paths[cpu_id][max_path[cpu_id]];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin     = -1;
+		path->end       = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_max_reset);
+
+int ipipe_trace_frozen_reset(void)
+{
+	int cpu_id;
+	unsigned long flags;
+	struct ipipe_trace_path *path;
+	int ret = 0;
+
+	flags = __ipipe_global_path_lock();
+
+	for_each_online_cpu(cpu_id) {
+		path = &trace_paths[cpu_id][frozen_path[cpu_id]];
+
+		if (path->dump_lock) {
+			ret = -EBUSY;
+			break;
+		}
+
+		path->begin = -1;
+		path->end = -1;
+		path->trace_pos = 0;
+		path->length    = 0;
+	}
+
+	__ipipe_global_path_unlock(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(ipipe_trace_frozen_reset);
+
+static void
+__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point,
+                      int trylock)
+{
+	struct task_struct *task = NULL;
+	char buf[8];
+	int i;
+	int locked = 1;
+
+	if (trylock) {
+		if (!read_trylock(&tasklist_lock))
+			locked = 0;
+	} else
+		read_lock(&tasklist_lock);
+
+	if (locked)
+		task = find_task_by_pid((pid_t)point->v);
+
+	if (task)
+		strncpy(task_info, task->comm, 11);
+	else
+		strcpy(task_info, "-<?>-");
+
+	if (locked)
+		read_unlock(&tasklist_lock);
+
+	for (i = strlen(task_info); i < 11; i++)
+		task_info[i] = ' ';
+
+	sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS);
+	strcpy(task_info + (11 - strlen(buf)), buf);
+}
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+void ipipe_trace_panic_freeze(void)
+{
+	unsigned long flags;
+	int cpu_id;
+
+	if (!ipipe_trace_enable)
+		return;
+
+	ipipe_trace_enable = 0;
+	local_irq_save_hw_notrace(flags);
+
+	cpu_id = ipipe_processor_id();
+
+	panic_path = &trace_paths[cpu_id][active_path[cpu_id]];
+
+	local_irq_restore_hw(flags);
+}
+EXPORT_SYMBOL(ipipe_trace_panic_freeze);
+
+void ipipe_trace_panic_dump(void)
+{
+	int cnt = back_trace;
+	int start, pos;
+	char task_info[12];
+
+	if (!panic_path)
+		return;
+
+	ipipe_context_check_off();
+
+	printk("I-pipe tracer log (%d points):\n", cnt);
+
+	start = pos = WRAP_POINT_NO(panic_path->trace_pos-1);
+
+	while (cnt-- > 0) {
+		struct ipipe_trace_point *point = &panic_path->point[pos];
+		long time;
+		char buf[16];
+		int i;
+
+		printk(" %c",
+		       (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+		for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+			printk("%c",
+			       (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'#' : '+') :
+				(IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+					'*' : ' '));
+
+		if (!point->eip)
+			printk("-<invalid>-\n");
+		else {
+			__ipipe_trace_point_type(buf, point);
+			printk(buf);
+
+			switch (point->type & IPIPE_TYPE_MASK) {
+				case IPIPE_TRACE_FUNC:
+					printk("           ");
+					break;
+
+				case IPIPE_TRACE_PID:
+					__ipipe_get_task_info(task_info,
+							      point, 1);
+					printk(task_info);
+					break;
+
+				default:
+					printk("0x%08lx ", point->v);
+			}
+
+			time = __ipipe_signed_tsc2us(point->timestamp -
+				panic_path->point[start].timestamp);
+			printk(" %5ld ", time);
+
+			__ipipe_print_symname(NULL, point->eip);
+			printk(" (");
+			__ipipe_print_symname(NULL, point->parent_eip);
+			printk(")\n");
+		}
+		pos = WRAP_POINT_NO(pos - 1);
+	}
+
+	panic_path = NULL;
+}
+EXPORT_SYMBOL(ipipe_trace_panic_dump);
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+
+
+/* --- /proc output --- */
+
+static notrace int __ipipe_in_critical_trpath(long point_no)
+{
+	return ((WRAP_POINT_NO(point_no-print_path->begin) <
+	         WRAP_POINT_NO(print_path->end-print_path->begin)) ||
+	        ((print_path->end == print_path->begin) &&
+	         (WRAP_POINT_NO(point_no-print_path->end) >
+	          print_post_trace)));
+}
+
+static long __ipipe_signed_tsc2us(long long tsc)
+{
+        unsigned long long abs_tsc;
+        long us;
+
+	/* ipipe_tsc2us works on unsigned => handle sign separately */
+        abs_tsc = (tsc >= 0) ? tsc : -tsc;
+        us = ipipe_tsc2us(abs_tsc);
+        if (tsc < 0)
+                return -us;
+        else
+                return us;
+}
+
+static void
+__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point)
+{
+	switch (point->type & IPIPE_TYPE_MASK) {
+		case IPIPE_TRACE_FUNC:
+			strcpy(buf, "func    ");
+			break;
+
+		case IPIPE_TRACE_BEGIN:
+			strcpy(buf, "begin   ");
+			break;
+
+		case IPIPE_TRACE_END:
+			strcpy(buf, "end     ");
+			break;
+
+		case IPIPE_TRACE_FREEZE:
+			strcpy(buf, "freeze  ");
+			break;
+
+		case IPIPE_TRACE_SPECIAL:
+			sprintf(buf, "(0x%02x)  ",
+				point->type >> IPIPE_TYPE_BITS);
+			break;
+
+		case IPIPE_TRACE_PID:
+			sprintf(buf, "[%5d] ", (pid_t)point->v);
+			break;
+	}
+}
+
+static void
+__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	char mark = ' ';
+	int point_no = point - print_path->point;
+	int i;
+
+	if (print_path->end == point_no)
+		mark = '<';
+	else if (print_path->begin == point_no)
+		mark = '>';
+	else if (__ipipe_in_critical_trpath(point_no))
+		mark = ':';
+	seq_printf(m, "%c%c", mark,
+	           (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' ');
+
+	if (!verbose_trace)
+		return;
+
+	for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--)
+		seq_printf(m, "%c",
+			(IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ?
+			    (IPIPE_TFLG_DOMAIN_STALLED(point, i) ?
+				'#' : '+') :
+			(IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' '));
+}
+
+static void
+__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)
+{
+	unsigned long delay = 0;
+	int next;
+	char *mark = "  ";
+
+	next = WRAP_POINT_NO(point+1 - print_path->point);
+
+	if (next != print_path->trace_pos)
+		delay = ipipe_tsc2ns(print_path->point[next].timestamp -
+		                     point->timestamp);
+
+	if (__ipipe_in_critical_trpath(point - print_path->point)) {
+		if (delay > IPIPE_DELAY_WARN)
+			mark = "! ";
+		else if (delay > IPIPE_DELAY_NOTE)
+			mark = "+ ";
+	}
+	seq_puts(m, mark);
+
+	if (verbose_trace)
+		seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000,
+		           (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' ');
+	else
+		seq_puts(m, " ");
+}
+
+static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)
+{
+	char namebuf[KSYM_NAME_LEN+1];
+	unsigned long size, offset;
+	const char *sym_name;
+	char *modname;
+
+	sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf);
+
+#ifdef CONFIG_IPIPE_TRACE_PANIC
+	if (!m) {
+		/* panic dump */
+		if (sym_name) {
+			printk("%s+0x%lx", sym_name, offset);
+			if (modname)
+				printk(" [%s]", modname);
+		}
+	} else
+#endif /* CONFIG_IPIPE_TRACE_PANIC */
+	{
+		if (sym_name) {
+			if (verbose_trace) {
+				seq_printf(m, "%s+0x%lx", sym_name, offset);
+				if (modname)
+					seq_printf(m, " [%s]", modname);
+			} else
+				seq_puts(m, sym_name);
+		} else
+			seq_printf(m, "<%08lx>", eip);
+	}
+}
+
+static void __ipipe_print_headline(struct seq_file *m)
+{
+	seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu "
+		   "us\n\n", trace_overhead/1000, trace_overhead%1000);
+
+	if (verbose_trace) {
+		const char *name[4] = { [0 ... 3] = "<unused>" };
+		struct list_head *pos;
+		int i = 0;
+
+		list_for_each_prev(pos, &__ipipe_pipeline) {
+			struct ipipe_domain *ipd =
+				list_entry(pos, struct ipipe_domain, p_link);
+
+			name[i] = ipd->name;
+			if (++i > 3)
+				break;
+		}
+
+		seq_printf(m,
+		           " +----- Hard IRQs ('|': locked)\n"
+		           " |+---- %s\n"
+		           " ||+--- %s\n"
+		           " |||+-- %s\n"
+		           " ||||+- %s%s\n"
+		           " |||||                        +---------- "
+		               "Delay flag ('+': > %d us, '!': > %d us)\n"
+		           " |||||                        |        +- "
+		               "NMI noise ('N')\n"
+		           " |||||                        |        |\n"
+		           "      Type    User Val.   Time    Delay  Function "
+		               "(Parent)\n",
+		           name[3], name[2], name[1], name[0],
+		           name[0] ? " ('*': domain stalled, '+': current, "
+		               "'#': current+stalled)" : "",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+	} else
+		seq_printf(m,
+		           " +--------------- Hard IRQs ('|': locked)\n"
+		           " |             +- Delay flag "
+		               "('+': > %d us, '!': > %d us)\n"
+		           " |             |\n"
+		           "  Type     Time   Function (Parent)\n",
+		           IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000);
+}
+
+static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *path;
+		unsigned long length_usecs;
+		int points, cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the longest of all per-cpu paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			path = &trace_paths[cpu][max_path[cpu]];
+			if ((print_path == NULL) ||
+			    (path->length > print_path->length)) {
+				print_path = path;
+				break;
+			}
+		}
+		print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		/* does this path actually contain data? */
+		if (print_path->end == print_path->begin)
+			return NULL;
+
+		/* number of points inside the critical path */
+		points = WRAP_POINT_NO(print_path->end-print_path->begin+1);
+
+		/* pre- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, pre-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = pre_trace;
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 1 - points -
+				print_post_trace;
+
+		length_usecs = ipipe_tsc2us(print_path->length);
+		seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: "
+			"%d (-%d/+%d), Length: %lu us\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			points, print_pre_trace, print_post_trace, length_usecs);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + n)];
+}
+
+static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	loff_t n = ++*pos;
+
+	/* check if we are inside the trace range with the next entry */
+	if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 +
+	                       print_pre_trace + print_post_trace))
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin -
+	                                        print_pre_trace + *pos)];
+}
+
+static void __ipipe_prtrace_stop(struct seq_file *m, void *p)
+{
+	if (print_path)
+		print_path->dump_lock = 0;
+	mutex_unlock(&out_mutex);
+}
+
+static int __ipipe_prtrace_show(struct seq_file *m, void *p)
+{
+	long time;
+	struct ipipe_trace_point *point = p;
+	char buf[16];
+
+	if (!point->eip) {
+		seq_puts(m, "-<invalid>-\n");
+		return 0;
+	}
+
+	__ipipe_print_pathmark(m, point);
+	__ipipe_trace_point_type(buf, point);
+	seq_puts(m, buf);
+	if (verbose_trace)
+		switch (point->type & IPIPE_TYPE_MASK) {
+			case IPIPE_TRACE_FUNC:
+				seq_puts(m, "           ");
+				break;
+
+			case IPIPE_TRACE_PID:
+				__ipipe_get_task_info(buf, point, 0);
+				seq_puts(m, buf);
+				break;
+
+			default:
+				seq_printf(m, "0x%08lx ", point->v);
+		}
+
+	time = __ipipe_signed_tsc2us(point->timestamp -
+		print_path->point[print_path->begin].timestamp);
+	seq_printf(m, "%5ld", time);
+
+	__ipipe_print_delay(m, point);
+	__ipipe_print_symname(m, point->eip);
+	seq_puts(m, " (");
+	__ipipe_print_symname(m, point->parent_eip);
+	seq_puts(m, ")\n");
+
+	return 0;
+}
+
+static struct seq_operations __ipipe_max_ptrace_ops = {
+	.start = __ipipe_max_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_max_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_max_reset(struct file *file, const char __user *pbuffer,
+                  size_t count, loff_t *data)
+{
+	mutex_lock(&out_mutex);
+	ipipe_trace_max_reset();
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_max_prtrace_fops = {
+	.open       = __ipipe_max_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_max_reset,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t n = *pos;
+
+	mutex_lock(&out_mutex);
+
+	if (!n) {
+		struct ipipe_trace_path *path;
+		int cpu;
+		unsigned long flags;
+
+		/* protect against max_path/frozen_path updates while we
+		 * haven't locked our target path, also avoid recursively
+		 * taking global_path_lock from NMI context */
+		flags = __ipipe_global_path_lock();
+
+		/* find the first of all per-cpu frozen paths */
+		print_path = NULL;
+		for_each_online_cpu(cpu) {
+			path = &trace_paths[cpu][frozen_path[cpu]];
+			if (path->end >= 0) {
+				print_path = path;
+				break;
+			}
+		}
+		if (print_path)
+			print_path->dump_lock = 1;
+
+		__ipipe_global_path_unlock(flags);
+
+		if (!print_path)
+			return NULL;
+
+		/* back- and post-tracing length, post-trace length was frozen
+		   in __ipipe_trace, back-trace may have to be reduced due to
+		   buffer overrun */
+		print_pre_trace  = back_trace-1; /* substract freeze point */
+		print_post_trace = WRAP_POINT_NO(print_path->trace_pos -
+		                                 print_path->end - 1);
+		if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1)
+			print_pre_trace = IPIPE_TRACE_POINTS - 2 -
+				print_post_trace;
+
+		seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n"
+			"------------------------------------------------------"
+			"------\n",
+			UTS_RELEASE, IPIPE_ARCH_STRING);
+		seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n",
+			cpu, print_path->point[print_path->begin].timestamp,
+			print_pre_trace+1, print_post_trace);
+		__ipipe_print_headline(m);
+	}
+
+	/* check if we are inside the trace range */
+	if (n >= print_pre_trace + 1 + print_post_trace)
+		return NULL;
+
+	/* return the next point to be shown */
+	return &print_path->point[WRAP_POINT_NO(print_path->begin-
+	                                        print_pre_trace+n)];
+}
+
+static struct seq_operations __ipipe_frozen_ptrace_ops = {
+	.start = __ipipe_frozen_prtrace_start,
+	.next  = __ipipe_prtrace_next,
+	.stop  = __ipipe_prtrace_stop,
+	.show  = __ipipe_prtrace_show
+};
+
+static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &__ipipe_frozen_ptrace_ops);
+}
+
+static ssize_t
+__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer,
+                    size_t count, loff_t *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, pbuffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	ipipe_trace_frozen_reset();
+	if (val > 0)
+		ipipe_trace_freeze(-1);
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+struct file_operations __ipipe_frozen_prtrace_fops = {
+	.open       = __ipipe_frozen_prtrace_open,
+	.read       = seq_read,
+	.write      = __ipipe_frozen_ctrl,
+	.llseek     = seq_lseek,
+	.release    = seq_release,
+};
+
+static int __ipipe_rd_proc_val(char *page, char **start, off_t off,
+                               int count, int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%u\n", *(int *)data);
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer,
+                               unsigned long count, void *data)
+{
+	char *end, buf[16];
+	int val;
+	int n;
+
+	n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count;
+
+	if (copy_from_user(buf, buffer, n))
+		return -EFAULT;
+
+	buf[n] = '\0';
+	val = simple_strtol(buf, &end, 0);
+
+	if (((*end != '\0') && !isspace(*end)) || (val < 0))
+		return -EINVAL;
+
+	mutex_lock(&out_mutex);
+	*(int *)data = val;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+static int __ipipe_rd_trigger(char *page, char **start, off_t off, int count,
+			      int *eof, void *data)
+{
+	int len;
+
+	if (!trigger_begin)
+		return 0;
+
+	len = sprint_symbol(page, trigger_begin);
+	page[len++] = '\n';
+
+	len -= off;
+	if (len <= off + count)
+		*eof = 1;
+	*start = page + off;
+	if (len > count)
+		len = count;
+	if (len < 0)
+		len = 0;
+
+	return len;
+}
+
+static int __ipipe_wr_trigger(struct file *file, const char __user *buffer,
+			      unsigned long count, void *data)
+{
+	char buf[KSYM_SYMBOL_LEN];
+	unsigned long begin, end;
+
+	if (count > sizeof(buf) - 1)
+		count = sizeof(buf) - 1;
+	if (copy_from_user(buf, buffer, count) < 0)
+		return -EFAULT;
+	buf[count] = 0;
+	if (buf[count-1] == '\n')
+		buf[count-1] = 0;
+
+	begin = kallsyms_lookup_name(buf);
+	if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL))
+		return -ENOENT;
+	end += begin - 1;
+
+	mutex_lock(&out_mutex);
+	/* invalidate the current range before setting a new one */
+	trigger_end = 0;
+	wmb();
+	/* set new range */
+	trigger_begin = begin;
+	wmb();
+	trigger_end = end;
+	mutex_unlock(&out_mutex);
+
+	return count;
+}
+
+extern struct proc_dir_entry *ipipe_proc_root;
+
+static void __init
+__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir,
+                              const char *name, int *value_ptr)
+{
+	struct proc_dir_entry *entry;
+
+	entry = create_proc_entry(name, 0644, trace_dir);
+	if (entry) {
+		entry->data = value_ptr;
+		entry->read_proc = __ipipe_rd_proc_val;
+		entry->write_proc = __ipipe_wr_proc_val;
+		entry->owner = THIS_MODULE;
+	}
+}
+
+void __init __ipipe_init_tracer(void)
+{
+	struct proc_dir_entry *trace_dir;
+	struct proc_dir_entry *entry;
+	unsigned long long start, end, min = ULLONG_MAX;
+	int i;
+#ifdef CONFIG_IPIPE_TRACE_VMALLOC
+	int cpu, path;
+
+	for_each_possible_cpu(cpu) {
+		trace_paths[cpu] = vmalloc(
+			sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		if (trace_paths[cpu] == NULL) {
+			printk(KERN_ERR "I-pipe: "
+			       "insufficient memory for trace buffer.\n");
+			return;
+		}
+		memset(trace_paths[cpu], 0,
+			sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS);
+		for (path = 0; path < IPIPE_TRACE_PATHS; path++) {
+			trace_paths[cpu][path].begin = -1;
+			trace_paths[cpu][path].end   = -1;
+		}
+	}
+#endif /* CONFIG_IPIPE_TRACE_VMALLOC */
+	ipipe_trace_enable = CONFIG_IPIPE_TRACE_ENABLE_VALUE;
+
+	/* Calculate minimum overhead of __ipipe_trace() */
+	local_irq_disable_hw();
+	for (i = 0; i < 100; i++) {
+		ipipe_read_tsc(start);
+		__ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0,
+			      __BUILTIN_RETURN_ADDRESS1, 0);
+		ipipe_read_tsc(end);
+
+		end -= start;
+		if (end < min)
+			min = end;
+	}
+	local_irq_enable_hw();
+	trace_overhead = ipipe_tsc2ns(min);
+
+	trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root);
+
+	entry = create_proc_entry("max", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_max_prtrace_fops;
+
+	entry = create_proc_entry("frozen", 0644, trace_dir);
+	if (entry)
+		entry->proc_fops = &__ipipe_frozen_prtrace_fops;
+
+	entry = create_proc_entry("trigger", 0644, trace_dir);
+	if (entry) {
+		entry->read_proc = __ipipe_rd_trigger;
+		entry->write_proc = __ipipe_wr_trigger;
+		entry->owner = THIS_MODULE;
+	}
+
+	__ipipe_create_trace_proc_val(trace_dir, "pre_trace_points",
+	                              &pre_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "post_trace_points",
+	                              &post_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "back_trace_points",
+	                              &back_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "verbose",
+	                              &verbose_trace);
+	__ipipe_create_trace_proc_val(trace_dir, "enable",
+	                              &ipipe_trace_enable);
+}
Index: linux-2.6.23/kernel/irq/chip.c
===================================================================
--- linux-2.6.23.orig/kernel/irq/chip.c
+++ linux-2.6.23/kernel/irq/chip.c
@@ -340,7 +340,9 @@ handle_level_irq(unsigned int irq, struc
 	irqreturn_t action_ret;
 
 	spin_lock(&desc->lock);
+#ifndef CONFIG_IPIPE
 	mask_ack_irq(desc, irq);
+#endif /* CONFIG_IPIPE */
 
 	if (unlikely(desc->status & IRQ_INPROGRESS))
 		goto out_unlock;
@@ -417,8 +419,13 @@ handle_fasteoi_irq(unsigned int irq, str
 
 	spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
+#ifdef CONFIG_IPIPE
+	desc->chip->unmask(irq);
+out:
+#else
 out:
 	desc->chip->eoi(irq);
+#endif
 
 	spin_unlock(&desc->lock);
 }
@@ -462,8 +469,10 @@ handle_edge_irq(unsigned int irq, struct
 
 	kstat_cpu(cpu).irqs[irq]++;
 
+#ifndef CONFIG_IPIPE
 	/* Start handling the irq */
 	desc->chip->ack(irq);
+#endif /* CONFIG_IPIPE */
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
@@ -503,6 +512,69 @@ out_unlock:
 	spin_unlock(&desc->lock);
 }
 
+#ifdef CONFIG_IPIPE
+
+void fastcall __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	mask_ack_irq(desc, irq);
+}
+
+void fastcall __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->unmask)
+		desc->chip->unmask(irq);
+}
+
+void fastcall __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->eoi(irq);
+}
+
+void fastcall __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->unmask(irq);
+}
+
+void fastcall __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+	desc->chip->ack(irq);
+}
+
+void fastcall __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc)
+{
+	static int done;
+
+	handle_bad_irq(irq, desc);
+
+	if (!done) {
+		printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n",
+		       __FUNCTION__, irq);
+		done = 1;
+	}
+}
+
+void fastcall __ipipe_noack_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+void fastcall __ipipe_noend_irq(unsigned irq, struct irq_desc *desc)
+{
+}
+
+#endif /* CONFIG_IPIPE */
+
 #ifdef CONFIG_SMP
 /**
  *	handle_percpu_IRQ - Per CPU local irq handler
@@ -518,8 +590,10 @@ handle_percpu_irq(unsigned int irq, stru
 
 	kstat_this_cpu.irqs[irq]++;
 
+#ifndef CONFIG_IPIPE
 	if (desc->chip->ack)
 		desc->chip->ack(irq);
+#endif /* CONFIG_IPIPE */
 
 	action_ret = handle_IRQ_event(irq, desc->action);
 	if (!noirqdebug)
@@ -529,6 +603,22 @@ handle_percpu_irq(unsigned int irq, stru
 		desc->chip->eoi(irq);
 }
 
+#ifdef CONFIG_IPIPE
+
+void fastcall __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->ack)
+		desc->chip->ack(irq);
+}
+
+void fastcall __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc)
+{
+	if (desc->chip->eoi)
+		desc->chip->eoi(irq);
+}
+
+#endif /* CONFIG_IPIPE */
+
 #endif /* CONFIG_SMP */
 
 void
@@ -548,6 +638,30 @@ __set_irq_handler(unsigned int irq, irq_
 
 	if (!handle)
 		handle = handle_bad_irq;
+#ifdef CONFIG_IPIPE
+	else if (handle == &handle_simple_irq) {
+		desc->ipipe_ack = &__ipipe_ack_simple_irq;
+		desc->ipipe_end = &__ipipe_end_simple_irq;
+	}
+	else if (handle == &handle_level_irq) {
+		desc->ipipe_ack = &__ipipe_ack_level_irq;
+		desc->ipipe_end = &__ipipe_end_level_irq;
+	}
+	else if (handle == &handle_edge_irq) {
+		desc->ipipe_ack = &__ipipe_ack_edge_irq;
+		desc->ipipe_end = &__ipipe_end_edge_irq;
+	}
+	else if (handle == &handle_fasteoi_irq) {
+		desc->ipipe_ack = &__ipipe_ack_fasteoi_irq;
+		desc->ipipe_end = &__ipipe_end_fasteoi_irq;
+	}
+#ifdef CONFIG_SMP
+	else if (handle == &handle_percpu_irq) {
+		desc->ipipe_ack = &__ipipe_ack_percpu_irq;
+		desc->ipipe_end = &__ipipe_end_percpu_irq;
+	}
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_IPIPE */
 	else if (desc->chip == &no_irq_chip) {
 		printk(KERN_WARNING "Trying to install %sinterrupt handler "
 		       "for IRQ%d\n", is_chained ? "chained " : "", irq);
@@ -559,7 +673,17 @@ __set_irq_handler(unsigned int irq, irq_
 		 * dummy_irq_chip for easy transition.
 		 */
 		desc->chip = &dummy_irq_chip;
+#ifdef CONFIG_IPIPE
+		desc->ipipe_ack = &__ipipe_noack_irq;
+		desc->ipipe_end = &__ipipe_noend_irq;
+#endif /* CONFIG_IPIPE */
 	}
+#ifdef CONFIG_IPIPE
+ 	else {
+ 		desc->ipipe_ack = &__ipipe_ack_bad_irq;
+ 		desc->ipipe_end = &__ipipe_noend_irq;
+ 	}
+#endif /* CONFIG_IPIPE */
 
 	spin_lock_irqsave(&desc->lock, flags);
 
@@ -569,6 +693,10 @@ __set_irq_handler(unsigned int irq, irq_
 			mask_ack_irq(desc, irq);
 		desc->status |= IRQ_DISABLED;
 		desc->depth = 1;
+#ifdef CONFIG_IPIPE
+		desc->ipipe_ack = &__ipipe_ack_bad_irq;
+		desc->ipipe_end = &__ipipe_noend_irq;
+#endif /* CONFIG_IPIPE */
 	}
 	desc->handle_irq = handle;
 	desc->name = name;
Index: linux-2.6.23/kernel/power/swsusp.c
===================================================================
--- linux-2.6.23.orig/kernel/power/swsusp.c
+++ linux-2.6.23/kernel/power/swsusp.c
@@ -278,6 +278,7 @@ int swsusp_suspend(void)
 		return error;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 	/* At this point, device_suspend() has been called, but *not*
 	 * device_power_down(). We *must* device_power_down() now.
 	 * Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -299,6 +300,7 @@ int swsusp_suspend(void)
 	 */
 	device_power_up();
  Enable_irqs:
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 	return error;
 }
@@ -308,6 +310,7 @@ int swsusp_resume(void)
 	int error;
 
 	local_irq_disable();
+	local_irq_disable_hw_cond();
 	/* NOTE:  device_power_down() is just a suspend() with irqs off;
 	 * it has no special "power things down" semantics
 	 */
@@ -334,6 +337,7 @@ int swsusp_resume(void)
 	restore_processor_state();
 	touch_softlockup_watchdog();
 	device_power_up();
+	local_irq_enable_hw_cond();
 	local_irq_enable();
 	return error;
 }
Index: linux-2.6.23/kernel/printk.c
===================================================================
--- linux-2.6.23.orig/kernel/printk.c
+++ linux-2.6.23/kernel/printk.c
@@ -504,6 +504,82 @@ static int have_callable_console(void)
  * printf(3)
  */
 
+#ifdef CONFIG_IPIPE
+
+static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED;
+
+static int __ipipe_printk_fill;
+
+static char __ipipe_printk_buf[__LOG_BUF_LEN];
+
+void __ipipe_flush_printk (unsigned virq, void *cookie)
+{
+	char *p = __ipipe_printk_buf;
+	int len, lmax, out = 0;
+	unsigned long flags;
+
+	goto start;
+
+	do {
+		spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+ start:
+		lmax = __ipipe_printk_fill;
+		while (out < lmax) {
+			len = strlen(p) + 1;
+			printk("%s",p);
+			p += len;
+			out += len;
+		}
+		spin_lock_irqsave(&__ipipe_printk_lock, flags);
+	}
+	while (__ipipe_printk_fill != lmax);
+
+	__ipipe_printk_fill = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+}
+
+asmlinkage int printk(const char *fmt, ...)
+{
+  	int r, fbytes, oldcount, cs = -1;
+    	unsigned long flags;
+	va_list args;
+
+	va_start(args, fmt);
+
+	if (test_bit(IPIPE_SPRINTK_FLAG,&ipipe_current_domain->flags) ||
+	    oops_in_progress)
+		cs = ipipe_disable_context_check(ipipe_processor_id());
+
+	if (ipipe_current_domain == ipipe_root_domain || cs != -1) {
+		r = vprintk(fmt, args);
+		if (cs != -1)
+			ipipe_restore_context_check(ipipe_processor_id(), cs);
+		goto out;
+	}
+
+	spin_lock_irqsave(&__ipipe_printk_lock, flags);
+
+	oldcount = __ipipe_printk_fill;
+	fbytes = __LOG_BUF_LEN - oldcount;
+
+	if (fbytes > 1)	{
+		r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill,
+			       fbytes, fmt, args) + 1; /* account for the null byte */
+		__ipipe_printk_fill += r;
+	} else
+		r = 0;
+
+	spin_unlock_irqrestore(&__ipipe_printk_lock, flags);
+
+	if (oldcount == 0)
+		ipipe_trigger_irq(__ipipe_printk_virq);
+out:
+	va_end(args);
+
+	return r;
+}
+#else /* !CONFIG_IPIPE */
 asmlinkage int printk(const char *fmt, ...)
 {
 	va_list args;
@@ -515,6 +591,7 @@ asmlinkage int printk(const char *fmt, .
 
 	return r;
 }
+#endif /* CONFIG_IPIPE */
 
 /* cpu currently holding logbuf_lock */
 static volatile unsigned int printk_cpu = UINT_MAX;
Index: linux-2.6.23/kernel/sched.c
===================================================================
--- linux-2.6.23.orig/kernel/sched.c
+++ linux-2.6.23/kernel/sched.c
@@ -1436,7 +1436,7 @@ static int try_to_wake_up(struct task_st
 
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;
-	if (!(old_state & state))
+	if (!(old_state & state) || (old_state & TASK_NOWAKEUP))
 		goto out;
 
 	if (p->se.on_rq)
@@ -1850,13 +1850,15 @@ asmlinkage void schedule_tail(struct tas
 #endif
 	if (current->set_child_tid)
 		put_user(current->pid, current->set_child_tid);
+
+	ipipe_init_notify(current);
 }
 
 /*
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline void
+static inline int
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -1897,12 +1899,17 @@ context_switch(struct rq *rq, struct tas
 	switch_to(prev, next, prev);
 
 	barrier();
+
+	if (task_hijacked(prev))
+		return 1;
 	/*
 	 * this_rq must be evaluated again because prev may have moved
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
 	finish_task_switch(this_rq(), prev);
+
+	return 0;
 }
 
 /*
@@ -3474,6 +3481,8 @@ asmlinkage void __sched schedule(void)
 	struct rq *rq;
 	int cpu;
 
+	ipipe_check_context(ipipe_root_domain);
+
 need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
@@ -3481,6 +3490,11 @@ need_resched:
 	rcu_qsctr_inc(cpu);
 	prev = rq->curr;
 	switch_count = &prev->nivcsw;
+	if (unlikely(prev->state & TASK_ATOMICSWITCH)) {
+		prev->state &= ~TASK_ATOMICSWITCH;
+		/* Pop one disable level -- one still remains. */
+		preempt_enable();
+	}
 
 	release_kernel_lock(prev);
 need_resched_nonpreemptible:
@@ -3514,7 +3528,8 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		context_switch(rq, prev, next); /* unlocks the rq */
+		if (context_switch(rq, prev, next)) /* unlocks the rq unless hijacked */
+			return;
 	} else
 		spin_unlock_irq(&rq->lock);
 
@@ -3542,6 +3557,7 @@ asmlinkage void __sched preempt_schedule
 	struct task_struct *task = current;
 	int saved_lock_depth;
 #endif
+	ipipe_check_context(ipipe_root_domain);
 	/*
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task.  Just return..
@@ -4265,6 +4281,7 @@ recheck:
 		deactivate_task(rq, p, 0);
 	oldprio = p->prio;
 	__setscheduler(rq, p, policy, param->sched_priority);
+	ipipe_setsched_notify(p);
 	if (on_rq) {
 		activate_task(rq, p, 0);
 		/*
@@ -6739,3 +6756,54 @@ void set_curr_task(int cpu, struct task_
 }
 
 #endif
+
+#ifdef CONFIG_IPIPE
+
+int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio)
+{
+	unsigned long flags;
+	int oldprio, on_rq;
+	struct rq *rq;
+
+	spin_lock_irqsave(&p->pi_lock, flags);
+	rq = __task_rq_lock(p);
+	on_rq = p->se.on_rq;
+	if (on_rq)
+		deactivate_task(rq, p, 0);
+	oldprio = p->prio;
+	__setscheduler(rq, p, policy, prio);
+	ipipe_setsched_notify(p);
+	if (on_rq) {
+		activate_task(rq, p, 0);
+		if (task_running(rq, p)) {
+			if (p->prio > oldprio)
+				resched_task(rq->curr);
+		} else
+			check_preempt_curr(rq, p);
+	}
+	__task_rq_unlock(rq);
+	spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	rt_mutex_adjust_pi(p);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(ipipe_setscheduler_root);
+
+int ipipe_reenter_root (struct task_struct *prev, int policy, int prio)
+{
+	finish_task_switch(this_rq(), prev);
+
+	(void)reacquire_kernel_lock(current);
+	preempt_enable_no_resched();
+
+	if (current->policy != policy || current->rt_priority != prio)
+		return ipipe_setscheduler_root(current, policy, prio);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(ipipe_reenter_root);
+
+#endif /* CONFIG_IPIPE */
Index: linux-2.6.23/kernel/signal.c
===================================================================
--- linux-2.6.23.orig/kernel/signal.c
+++ linux-2.6.23/kernel/signal.c
@@ -455,6 +455,7 @@ void signal_wake_up(struct task_struct *
 	unsigned int mask;
 
 	set_tsk_thread_flag(t, TIF_SIGPENDING);
+	ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */
 
 	/*
 	 * For SIGKILL, we want to wake it up in the stopped/traced case.
Index: linux-2.6.23/kernel/time/clockevents.c
===================================================================
--- linux-2.6.23.orig/kernel/time/clockevents.c
+++ linux-2.6.23/kernel/time/clockevents.c
@@ -84,6 +84,7 @@ int clockevents_program_event(struct clo
 		return -ETIME;
 
 	dev->next_event = expires;
+	dev->delta = delta;
 
 	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
 		return 0;
Index: linux-2.6.23/lib/Kconfig.debug
===================================================================
--- linux-2.6.23.orig/lib/Kconfig.debug
+++ linux-2.6.23/lib/Kconfig.debug
@@ -71,6 +71,8 @@ config HEADERS_CHECK
 	  exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
 	  your build tree), to make sure they're suitable.
 
+source "kernel/ipipe/Kconfig.debug"
+
 config DEBUG_KERNEL
 	bool "Kernel debugging"
 	help
Index: linux-2.6.23/lib/bust_spinlocks.c
===================================================================
--- linux-2.6.23.orig/lib/bust_spinlocks.c
+++ linux-2.6.23/lib/bust_spinlocks.c
@@ -12,16 +12,19 @@
 #include <linux/tty.h>
 #include <linux/wait.h>
 #include <linux/vt_kern.h>
+#include <linux/ipipe_trace.h>
 
 
 void __attribute__((weak)) bust_spinlocks(int yes)
 {
 	if (yes) {
+ 		ipipe_trace_panic_freeze();
 		oops_in_progress = 1;
 	} else {
 #ifdef CONFIG_VT
 		unblank_screen();
 #endif
+		ipipe_trace_panic_dump();
 		oops_in_progress = 0;
 		wake_up_klogd();
 	}
Index: linux-2.6.23/lib/ioremap.c
===================================================================
--- linux-2.6.23.orig/lib/ioremap.c
+++ linux-2.6.23/lib/ioremap.c
@@ -84,8 +84,8 @@ int ioremap_page_range(unsigned long add
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-
-	flush_cache_vmap(start, end);
+	__ipipe_pin_range_globally(start, end);
+ 	flush_cache_vmap(start, end);
 
 	return err;
 }
Index: linux-2.6.23/lib/smp_processor_id.c
===================================================================
--- linux-2.6.23.orig/lib/smp_processor_id.c
+++ linux-2.6.23/lib/smp_processor_id.c
@@ -13,6 +13,9 @@ unsigned int debug_smp_processor_id(void
 	int this_cpu = raw_smp_processor_id();
 	cpumask_t this_mask;
 
+	if (!ipipe_root_domain_p)
+		goto out;
+
 	if (likely(preempt_count))
 		goto out;
 
Index: linux-2.6.23/lib/spinlock_debug.c
===================================================================
--- linux-2.6.23.orig/lib/spinlock_debug.c
+++ linux-2.6.23/lib/spinlock_debug.c
@@ -133,6 +133,8 @@ void _raw_spin_lock(spinlock_t *lock)
 	debug_spin_lock_after(lock);
 }
 
+EXPORT_SYMBOL(_raw_spin_lock);
+
 int _raw_spin_trylock(spinlock_t *lock)
 {
 	int ret = __raw_spin_trylock(&lock->raw_lock);
@@ -148,12 +150,16 @@ int _raw_spin_trylock(spinlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_spin_trylock);
+
 void _raw_spin_unlock(spinlock_t *lock)
 {
 	debug_spin_unlock(lock);
 	__raw_spin_unlock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_spin_unlock);
+
 static void rwlock_bug(rwlock_t *lock, const char *msg)
 {
 	if (!debug_locks_off())
@@ -199,6 +205,8 @@ void _raw_read_lock(rwlock_t *lock)
 	__raw_read_lock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_read_lock);
+
 int _raw_read_trylock(rwlock_t *lock)
 {
 	int ret = __raw_read_trylock(&lock->raw_lock);
@@ -212,12 +220,16 @@ int _raw_read_trylock(rwlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_read_trylock);
+
 void _raw_read_unlock(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
 	__raw_read_unlock(&lock->raw_lock);
 }
 
+EXPORT_SYMBOL(_raw_read_unlock);
+
 static inline void debug_write_lock_before(rwlock_t *lock)
 {
 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
@@ -275,6 +287,8 @@ void _raw_write_lock(rwlock_t *lock)
 	debug_write_lock_after(lock);
 }
 
+EXPORT_SYMBOL(_raw_write_lock);
+
 int _raw_write_trylock(rwlock_t *lock)
 {
 	int ret = __raw_write_trylock(&lock->raw_lock);
@@ -290,8 +304,12 @@ int _raw_write_trylock(rwlock_t *lock)
 	return ret;
 }
 
+EXPORT_SYMBOL(_raw_write_trylock);
+
 void _raw_write_unlock(rwlock_t *lock)
 {
 	debug_write_unlock(lock);
 	__raw_write_unlock(&lock->raw_lock);
 }
+
+EXPORT_SYMBOL(_raw_write_unlock);
Index: linux-2.6.23/mm/memory.c
===================================================================
--- linux-2.6.23.orig/mm/memory.c
+++ linux-2.6.23/mm/memory.c
@@ -50,6 +50,7 @@
 #include <linux/delayacct.h>
 #include <linux/init.h>
 #include <linux/writeback.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
@@ -416,6 +417,34 @@ struct page *vm_normal_page(struct vm_ar
 	return pfn_to_page(pfn);
 }
 
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
+{
+	/*
+	 * If the source page was a PFN mapping, we don't have
+	 * a "struct page" for it. We do a best-effort copy by
+	 * just copying from the original user address. If that
+	 * fails, we just zero-fill it. Live with it.
+	 */
+	if (unlikely(!src)) {
+		void *kaddr = kmap_atomic(dst, KM_USER0);
+		void __user *uaddr = (void __user *)(va & PAGE_MASK);
+
+		/*
+		 * This really shouldn't fail, because the page is there
+		 * in the page tables. But it might just be unreadable,
+		 * in which case we just give up and fill the result with
+		 * zeroes.
+		 */
+		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+			memset(kaddr, 0, PAGE_SIZE);
+		kunmap_atomic(kaddr, KM_USER0);
+		flush_dcache_page(dst);
+		return;
+		
+	}
+	copy_user_highpage(dst, src, va, vma);
+}
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range
@@ -424,8 +453,8 @@ struct page *vm_normal_page(struct vm_ar
 
 static inline void
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+	     pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+	     unsigned long addr, int *rss, struct page *uncow_page)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
@@ -464,6 +493,21 @@ copy_one_pte(struct mm_struct *dst_mm, s
 	 * in the parent and the child
 	 */
 	if (is_cow_mapping(vm_flags)) {
+#ifdef CONFIG_IPIPE
+		if (uncow_page) {
+			struct page *old_page = vm_normal_page(vma, addr, pte);
+			cow_user_page(uncow_page, old_page, addr, vma);
+			pte = mk_pte(uncow_page, vma->vm_page_prot);
+
+			if (vm_flags & VM_SHARED)
+				pte = pte_mkclean(pte);
+			pte = pte_mkold(pte);
+
+			page_dup_rmap(uncow_page, vma, addr);
+			rss[!!PageAnon(uncow_page)]++;
+			goto out_set_pte;
+		}
+#endif /* CONFIG_IPIPE */
 		ptep_set_wrprotect(src_mm, addr, src_pte);
 		pte = pte_wrprotect(pte);
 	}
@@ -494,13 +538,27 @@ static int copy_pte_range(struct mm_stru
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
+	struct page *uncow_page = NULL;
 	int rss[2];
-
+#ifdef CONFIG_IPIPE
+	int do_cow_break = 0;
+again:
+	if (do_cow_break) {
+		uncow_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+		if (!uncow_page)
+			return -ENOMEM;
+		do_cow_break = 0;
+	}
+#else
 again:
+#endif
 	rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
+	if (!dst_pte) {
+		if (uncow_page)
+			page_cache_release(uncow_page);
 		return -ENOMEM;
+	}
 	src_pte = pte_offset_map_nested(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -522,7 +580,20 @@ again:
 			progress++;
 			continue;
 		}
-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+#ifdef CONFIG_IPIPE
+		if (likely(uncow_page == NULL) && likely(pte_present(*src_pte))) {
+			if (is_cow_mapping(vma->vm_flags)) {
+				if (((vma->vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED))
+				    == (VM_LOCKED|VM_PINNED)) {
+					do_cow_break = 1;
+					break;
+				}
+			}
+		}
+#endif
+		copy_one_pte(dst_mm, src_mm, dst_pte,
+			     src_pte, vma, addr, rss, uncow_page);
+		uncow_page = NULL;
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -1586,34 +1657,6 @@ static inline pte_t maybe_mkwrite(pte_t 
 	return pte;
 }
 
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
-{
-	/*
-	 * If the source page was a PFN mapping, we don't have
-	 * a "struct page" for it. We do a best-effort copy by
-	 * just copying from the original user address. If that
-	 * fails, we just zero-fill it. Live with it.
-	 */
-	if (unlikely(!src)) {
-		void *kaddr = kmap_atomic(dst, KM_USER0);
-		void __user *uaddr = (void __user *)(va & PAGE_MASK);
-
-		/*
-		 * This really shouldn't fail, because the page is there
-		 * in the page tables. But it might just be unreadable,
-		 * in which case we just give up and fill the result with
-		 * zeroes.
-		 */
-		if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
-			memset(kaddr, 0, PAGE_SIZE);
-		kunmap_atomic(kaddr, KM_USER0);
-		flush_dcache_page(dst);
-		return;
-
-	}
-	copy_user_highpage(dst, src, va, vma);
-}
-
 /*
  * This routine handles present pages, when users try to write
  * to a shared page. It is done by copying the page to a new address
@@ -2867,3 +2910,104 @@ int access_process_vm(struct task_struct
 	return buf - old_buf;
 }
 EXPORT_SYMBOL_GPL(access_process_vm);
+
+#ifdef CONFIG_IPIPE
+
+static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	spinlock_t *ptl;
+	pte_t *pte;
+
+	do {
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte)
+			continue;
+
+		if (!pte_present(*pte)) {
+			pte_unmap_unlock(pte, ptl);
+			continue;
+		}
+
+		if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM)
+			return -ENOMEM;
+	} while (addr += PAGE_SIZE, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (ipipe_pin_pte_range(mm, pmd, vma, addr, end))
+			return -ENOMEM;
+	} while (pmd++, addr = next, addr != end);
+	return 0;
+}
+
+static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd,
+				      struct vm_area_struct *vma,
+				      unsigned long addr, unsigned long end)
+{
+	unsigned long next;
+	pud_t *pud;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (ipipe_pin_pmd_range(mm, pud, vma, addr, end))
+			return -ENOMEM;
+	} while (pud++, addr = next, addr != end);
+	return 0;
+}
+
+int ipipe_disable_ondemand_mappings(struct task_struct *tsk)
+{
+	unsigned long addr, next, end;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	int result = 0;
+	pgd_t *pgd;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	down_write(&mm->mmap_sem);
+	if (mm->def_flags & VM_PINNED)
+		goto done_mm;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (!is_cow_mapping(vma->vm_flags))
+			continue;
+
+		addr = vma->vm_start;
+		end = vma->vm_end;
+
+		pgd = pgd_offset(mm, addr);
+		do {
+			next = pgd_addr_end(addr, end);
+			if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) {
+				result = -ENOMEM;
+				goto done_mm;
+			}
+		} while (pgd++, addr = next, addr != end);
+	}
+	mm->def_flags |= VM_PINNED;
+
+  done_mm:
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	return result;
+}
+
+EXPORT_SYMBOL(ipipe_disable_ondemand_mappings);
+
+#endif
Index: linux-2.6.23/mm/mlock.c
===================================================================
--- linux-2.6.23.orig/mm/mlock.c
+++ linux-2.6.23/mm/mlock.c
@@ -173,10 +173,10 @@ asmlinkage long sys_munlock(unsigned lon
 static int do_mlockall(int flags)
 {
 	struct vm_area_struct * vma, * prev = NULL;
-	unsigned int def_flags = 0;
+	unsigned int def_flags = current->mm->def_flags & VM_PINNED;
 
 	if (flags & MCL_FUTURE)
-		def_flags = VM_LOCKED;
+		def_flags |= VM_LOCKED;
 	current->mm->def_flags = def_flags;
 	if (flags == MCL_FUTURE)
 		goto out;
Index: linux-2.6.23/mm/vmalloc.c
===================================================================
--- linux-2.6.23.orig/mm/vmalloc.c
+++ linux-2.6.23/mm/vmalloc.c
@@ -161,6 +161,7 @@ int map_vm_area(struct vm_struct *area, 
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
+	__ipipe_pin_range_globally((unsigned long) area->addr, end);
 	flush_cache_vmap((unsigned long) area->addr, end);
 	return err;
 }
Index: linux-2.6.23/kernel/spinlock.c
===================================================================
--- linux-2.6.23.orig/kernel/spinlock.c
+++ linux-2.6.23/kernel/spinlock.c
@@ -88,7 +88,7 @@ unsigned long __lockfunc _spin_lock_irqs
 	 * _raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 #else
 	_raw_spin_lock_flags(lock, &flags);
@@ -305,7 +305,7 @@ unsigned long __lockfunc _spin_lock_irqs
 	 * _raw_spin_lock_flags() code, because lockdep assumes
 	 * that interrupts are not re-enabled during lock-acquire:
 	 */
-#ifdef CONFIG_LOCKDEP
+#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE)
 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
 #else
 	_raw_spin_lock_flags(lock, &flags);
Index: linux-2.6.23/arch/i386/boot/compressed/Makefile
===================================================================
--- linux-2.6.23.orig/arch/i386/boot/compressed/Makefile
+++ linux-2.6.23/arch/i386/boot/compressed/Makefile
@@ -12,6 +12,7 @@ LDFLAGS_vmlinux := -T
 hostprogs-y	:= relocs
 
 CFLAGS  := -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
+	   -Iinclude/asm-i386/mach-default \
 	   -fno-strict-aliasing -fPIC \
 	   $(call cc-option,-ffreestanding) \
 	   $(call cc-option,-fno-stack-protector)

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 252 bytes --]

                 reply	other threads:[~2007-10-13  9:10 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47108B7C.4080906@domain.hid \
    --to=jan.kiszka@domain.hid \
    --cc=Xenomai-core@domain.hid \
    --cc=adeos-main@gna.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.