From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <46B85F91.7030105@domain.hid> Date: Tue, 07 Aug 2007 14:03:29 +0200 From: Benjamin ZORES MIME-Version: 1.0 References: <46ADAA32.1000407@domain.hid> <46B03C83.60007@domain.hid> In-Reply-To: <46B03C83.60007@domain.hid> Content-Type: multipart/mixed; boundary="------------000602050800000000030502" Subject: Re: [Xenomai-core] Adeos PowerPC port over 2.6.21 List-Id: "Xenomai life and development \(bug reports, patches, discussions\)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: xenomai-core This is a multi-part message in MIME format. --------------000602050800000000030502 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Benjamin ZORES wrote: > Benjamin ZORES wrote: >> Hi, >> >> I've seen that Adeos has been officially ported to PowerPC architecture. >> Please find an update of the adeos-ipipe-2.6.20-powerpc-1.6-02 to >> Linux 2.6.21 (.5) kernel, in case somebody's interested in it. >> >> Ben >> ------------------------------------------------------------------------ >> >> _______________________________________________ >> Xenomai-core mailing list >> Xenomai-core@domain.hid >> https://mail.gna.org/listinfo/xenomai-core > Hi, > > Attached is updated patch to latest 1.6-03 version. Attached is updated patch to latest Adeos PowerPC 1.6-04 version. Regards, Ben --------------000602050800000000030502 Content-Type: text/x-patch; name="adeos-ipipe-2.6.21-powerpc-1.6-04.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="adeos-ipipe-2.6.21-powerpc-1.6-04.patch" diff -Naur linux-2.6.21.5.orig/arch/powerpc/boot/Makefile linux-2.6.21.5/arch/powerpc/boot/Makefile --- linux-2.6.21.5.orig/arch/powerpc/boot/Makefile 2007-08-07 10:55:06.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/boot/Makefile 2007-08-07 10:56:22.000000000 +0200 @@ -27,6 +27,14 @@ $(shell $(CROSS32CC) -print-file-name=include) -fPIC BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc +ifdef CONFIG_IPIPE_TRACE +# do not trace the boot loader +nullstring := +space := $(nullstring) # end of the line +pg_flag = $(nullstring) -pg # end of the line +BOOTCFLAGS := $(subst ${pg_flag},${space},${BOOTCFLAGS}) +endif + ifeq ($(call cc-option-yn, -fstack-protector),y) BOOTCFLAGS += -fno-stack-protector endif diff -Naur linux-2.6.21.5.orig/arch/powerpc/Kconfig linux-2.6.21.5/arch/powerpc/Kconfig --- linux-2.6.21.5.orig/arch/powerpc/Kconfig 2007-08-07 10:55:06.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/Kconfig 2007-08-07 10:56:22.000000000 +0200 @@ -758,6 +758,8 @@ menu "Kernel options" +source "kernel/ipipe/Kconfig" + config HIGHMEM bool "High memory support" depends on PPC32 diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/cputable.c linux-2.6.21.5/arch/powerpc/kernel/cputable.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/cputable.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/cputable.c 2007-08-07 10:56:22.000000000 +0200 @@ -1255,7 +1255,7 @@ #endif /* CONFIG_PPC32 */ }; -struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr) +notrace struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr) { struct cpu_spec *s = cpu_specs; struct cpu_spec **cur = &cur_cpu_spec; @@ -1284,7 +1284,7 @@ return NULL; } -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +notrace void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) { struct fixup_entry { unsigned long mask; diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/entry_32.S linux-2.6.21.5/arch/powerpc/kernel/entry_32.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/entry_32.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/entry_32.S 2007-08-07 10:56:22.000000000 +0200 @@ -132,8 +132,23 @@ * check for stack overflow */ lwz r9,THREAD_INFO-THREAD(r12) +#ifdef CONFIG_IPIPE + /* Allow for private kernel-based stacks: those must not cause + the stack overflow detection to trigger when some activity has + been preempted over them. We just check if the kernel stack is + not treading on the memory area ranging from + ¤t->thread_info to ¤t->thread, which is coarser + than the vanilla implementation, but likely sensitive enough + to catch overflows soon enough though.*/ + addi r12,r9,THREAD + cmplw 0,r1,r9 + cmplw 1,r1,r12 + crand 1,1,4 + bgt- stack_ovf /* if r9 < r1 < r9+THREAD */ +#else /* CONFIG_IPIPE */ cmplw r1,r9 /* if r1 <= current->thread_info */ ble- stack_ovf /* then the kernel stack overflowed */ +#endif /* CONFIG_IPIPE */ 5: #ifdef CONFIG_6xx tophys(r9,r9) /* check local flags */ @@ -198,6 +213,21 @@ lwz r11,_CCR(r1) /* Clear SO bit in CR */ rlwinm r11,r11,0,4,2 stw r11,_CCR(r1) +#ifdef CONFIG_IPIPE + addi r3,r1,GPR0 + bl __ipipe_syscall_root + cmpwi r3,0 + lwz r3,GPR3(r1) + lwz r0,GPR0(r1) + lwz r4,GPR4(r1) + lwz r5,GPR5(r1) + lwz r6,GPR6(r1) + lwz r7,GPR7(r1) + lwz r8,GPR8(r1) + lwz r9,GPR9(r1) + bgt .ipipe_end_syscall + blt ret_from_syscall +#endif /* CONFIG_IPIPE */ #ifdef SHOW_SYSCALLS bl do_show_syscall #endif /* SHOW_SYSCALLS */ @@ -260,11 +290,34 @@ SYNC RFI +#ifdef CONFIG_IPIPE +.ipipe_end_syscall: + LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ + SYNC + MTMSRD(r10) + b syscall_exit_cont +#endif /* CONFIG_IPIPE */ + 66: li r3,-ENOSYS b ret_from_syscall .globl ret_from_fork ret_from_fork: +#ifdef CONFIG_IPIPE +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + stwu r1,-4(r1) + stw r3,0(r1) + lis r3,(0x80000000)@h + ori r3,r3,(0x80000000)@l + bl ipipe_trace_end + lwz r3,0(r1) + addi r1,r1,4 +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) +#endif /* CONFIG_IPIPE */ REST_NVGPRS(r1) bl schedule_tail li r3,0 @@ -630,6 +683,11 @@ SYNC /* Some chip revs have problems here... */ MTMSRD(r10) /* disable interrupts */ +#ifdef CONFIG_IPIPE + bl __ipipe_check_root + cmpwi r3, 0 + beq- restore +#endif /* CONFIG_IPIPE */ lwz r3,_MSR(r1) /* Returning to user mode? */ andi. r0,r3,MSR_PR beq resume_kernel @@ -665,11 +723,37 @@ beq+ restore andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ +#ifdef CONFIG_IPIPE +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + lis r3,(0x80000000)@h + ori r3,r3,(0x80000000)@l + bl ipipe_trace_end +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) + bl __ipipe_fast_stall_root +#endif /* CONFIG_IPIPE */ 1: bl preempt_schedule_irq rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r3,TI_FLAGS(r9) andi. r0,r3,_TIF_NEED_RESCHED bne- 1b +#ifdef CONFIG_IPIPE + bl __ipipe_fast_unstall_root + LOAD_MSR_KERNEL(r10,MSR_KERNEL) + SYNC + MTMSRD(r10) +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + lwz r3,_MSR(r1) + andi. r0,r3,MSR_EE + bne restore + lis r3,(0x80000000)@h + ori r3,r3,(0x80000000)@l + bl ipipe_trace_begin +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ +#endif /* CONFIG_IPIPE */ #else resume_kernel: #endif /* CONFIG_PREEMPT */ @@ -929,6 +1013,13 @@ .comm ee_restarts,4 +#ifdef CONFIG_IPIPE +_GLOBAL(__ipipe_ret_from_except) + cmpwi r3, 0 + bne+ ret_from_except + b restore +#endif /* CONFIG_IPIPE */ + /* * PROM code for specific machines follows. Put it * here so it's easy to add arch-specific sections later. diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_32.S linux-2.6.21.5/arch/powerpc/kernel/head_32.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_32.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_32.S 2007-08-07 10:56:22.000000000 +0200 @@ -329,6 +329,12 @@ EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ ret_from_except_full) +#ifdef CONFIG_IPIPE +#define EXC_XFER_IPIPE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + __ipipe_ret_from_except) +#endif /* CONFIG_IPIPE */ + #define EXC_XFER_LITE(n, hdlr) \ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ ret_from_except) @@ -413,7 +419,11 @@ EXC_XFER_EE_LITE(0x400, handle_page_fault) /* External interrupt */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ /* Alignment exception */ . = 0x600 @@ -445,7 +455,11 @@ EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x900, Decrementer, __ipipe_grab_timer, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_44x.S linux-2.6.21.5/arch/powerpc/kernel/head_44x.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_44x.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_44x.S 2007-08-07 10:56:22.000000000 +0200 @@ -424,8 +424,11 @@ /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION - /* External Input Interrupt */ - EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) +#ifdef CONFIG_IPIPE + EXCEPTION(0x0500, ExternalInput, __ipipe_grab_irq, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ /* Alignment Interrupt */ ALIGNMENT_EXCEPTION diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_4xx.S linux-2.6.21.5/arch/powerpc/kernel/head_4xx.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_4xx.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_4xx.S 2007-08-07 10:56:22.000000000 +0200 @@ -228,6 +228,12 @@ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ ret_from_except_full) +#ifdef CONFIG_IPIPE +#define EXC_XFER_IPIPE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + __ipipe_ret_from_except) +#endif /* CONFIG_IPIPE */ + #define EXC_XFER_LITE(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ret_from_except) @@ -396,7 +402,11 @@ EXC_XFER_EE_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x0500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) @@ -434,7 +444,11 @@ lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_IPIPE + EXC_XFER_IPIPE(0x1000, __ipipe_grab_timer) +#else /* !CONFIG_IPIPE */ EXC_XFER_LITE(0x1000, timer_interrupt) +#endif /* CONFIG_IPIPE */ #if 0 /* NOTE: diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_8xx.S linux-2.6.21.5/arch/powerpc/kernel/head_8xx.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_8xx.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_8xx.S 2007-08-07 10:56:22.000000000 +0200 @@ -187,6 +187,12 @@ EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ ret_from_except_full) +#ifdef CONFIG_IPIPE +#define EXC_XFER_IPIPE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + __ipipe_ret_from_except) +#endif /* CONFIG_IPIPE */ + #define EXC_XFER_LITE(n, hdlr) \ EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ ret_from_except) @@ -238,7 +244,11 @@ EXC_XFER_EE_LITE(0x400, handle_page_fault) /* External interrupt */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x500, HardwareInterrupt, __ipipe_grab_irq, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ /* Alignment exception */ . = 0x600 @@ -259,7 +269,11 @@ EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x900, Decrementer, __ipipe_grab_timer, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_booke.h linux-2.6.21.5/arch/powerpc/kernel/head_booke.h --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_booke.h 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_booke.h 2007-08-07 10:56:22.000000000 +0200 @@ -187,6 +187,12 @@ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ ret_from_except_full) +#ifdef CONFIG_IPIPE +#define EXC_XFER_IPIPE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + __ipipe_ret_from_except) +#endif /* CONFIG_IPIPE */ + #define EXC_XFER_LITE(n, hdlr) \ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ ret_from_except) @@ -345,6 +351,15 @@ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_STD(0x0700, program_check_exception) +#ifdef CONFIG_IPIPE +#define DECREMENTER_EXCEPTION \ + START_EXCEPTION(Decrementer) \ + NORMAL_EXCEPTION_PROLOG; \ + lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \ + mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_IPIPE(0x0900, __ipipe_grab_timer) +#else /* !CONFIG_IPIPE */ #define DECREMENTER_EXCEPTION \ START_EXCEPTION(Decrementer) \ NORMAL_EXCEPTION_PROLOG; \ @@ -352,6 +367,7 @@ mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_LITE(0x0900, timer_interrupt) +#endif /* CONFIG_IPIPE */ #define FP_UNAVAILABLE_EXCEPTION \ START_EXCEPTION(FloatingPointUnavailable) \ diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/head_fsl_booke.S linux-2.6.21.5/arch/powerpc/kernel/head_fsl_booke.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/head_fsl_booke.S 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/head_fsl_booke.S 2007-08-07 10:56:22.000000000 +0200 @@ -529,7 +529,11 @@ INSTRUCTION_STORAGE_EXCEPTION /* External Input Interrupt */ +#ifdef CONFIG_IPIPE + EXCEPTION(0x0500, ExternalInput, __ipipe_grab_irq, EXC_XFER_IPIPE) +#else /* !CONFIG_IPIPE */ EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) +#endif /* CONFIG_IPIPE */ /* Alignment Interrupt */ ALIGNMENT_EXCEPTION diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/idle.c linux-2.6.21.5/arch/powerpc/kernel/idle.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/idle.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/idle.c 2007-08-07 10:56:22.000000000 +0200 @@ -58,6 +58,7 @@ while (1) { while (!need_resched() && !cpu_should_die()) { ppc64_runlatch_off(); + ipipe_suspend_domain(); if (ppc_md.power_save) { clear_thread_flag(TIF_POLLING_NRFLAG); @@ -66,13 +67,13 @@ * is ordered w.r.t. need_resched() test. */ smp_mb(); - local_irq_disable(); + local_irq_disable_hw(); /* check again after disabling irqs */ if (!need_resched() && !cpu_should_die()) ppc_md.power_save(); - local_irq_enable(); + local_irq_enable_hw(); set_thread_flag(TIF_POLLING_NRFLAG); } else { diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/io.c linux-2.6.21.5/arch/powerpc/kernel/io.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/io.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/io.c 2007-08-07 10:56:22.000000000 +0200 @@ -120,7 +120,7 @@ #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) -void _memset_io(volatile void __iomem *addr, int c, unsigned long n) +notrace void _memset_io(volatile void __iomem *addr, int c, unsigned long n) { void *p = (void __force *)addr; u32 lc = c; diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/ipipe.c linux-2.6.21.5/arch/powerpc/kernel/ipipe.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/ipipe.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/arch/powerpc/kernel/ipipe.c 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,649 @@ +/* -*- linux-c -*- + * linux/arch/powerpc/kernel/ipipe.c + * + * Copyright (C) 2002-2007 Philippe Gerum. + * Copyright (C) 2004 Wolfgang Grandegger (Adeos/ppc port over 2.4). + * Copyright (C) 2005 Heikki Lindholm (PowerPC 970 fixes). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-dependent I-PIPE core support for PowerPC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Current reload value for the decrementer. */ +unsigned long __ipipe_decr_ticks; + +/* Next tick date (timebase value). */ +unsigned long long __ipipe_decr_next[IPIPE_NR_CPUS]; + +struct pt_regs __ipipe_tick_regs[IPIPE_NR_CPUS]; + +static void __ipipe_do_IRQ(unsigned irq, void *cookie); + +static void __ipipe_do_timer(unsigned irq, void *cookie); + +#ifdef CONFIG_SMP + +static cpumask_t __ipipe_cpu_sync_map; + +static cpumask_t __ipipe_cpu_lock_map; + +static ipipe_spinlock_t __ipipe_cpu_barrier = IPIPE_SPIN_LOCK_UNLOCKED; + +static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); + +static void (*__ipipe_cpu_sync) (void); + +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) +{ + BUG(); /* SMP not fully implemented yet. */ +} + +/* Always called with hw interrupts off. */ + +void __ipipe_do_critical_sync(unsigned irq) +{ + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + + cpu_set(cpuid, __ipipe_cpu_sync_map); + + /* + * Now we are in sync with the lock requestor running on another + * CPU. Enter a spinning wait until he releases the global + * lock. + */ + spin_lock_hw(&__ipipe_cpu_barrier); + + /* Got it. Now get out. */ + + if (__ipipe_cpu_sync) + /* Call the sync routine if any. */ + __ipipe_cpu_sync(); + + spin_unlock_hw(&__ipipe_cpu_barrier); + + cpu_clear(cpuid, __ipipe_cpu_sync_map); +} + +#endif /* CONFIG_SMP */ + +/* + * ipipe_critical_enter() -- Grab the superlock excluding all CPUs + * but the current one from a critical section. This lock is used when + * we must enforce a global critical section for a single CPU in a + * possibly SMP system whichever context the CPUs are running. + */ +unsigned long ipipe_critical_enter(void (*syncfn) (void)) +{ + unsigned long flags; + + local_irq_save_hw(flags); + +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { /* We might be running a SMP-kernel on a UP box... */ + ipipe_declare_cpuid; + cpumask_t lock_map; + + ipipe_load_cpuid(); + + if (!cpu_test_and_set(cpuid, __ipipe_cpu_lock_map)) { + while (cpu_test_and_set(BITS_PER_LONG - 1, + __ipipe_cpu_lock_map)) { + int n = 0; + do { + cpu_relax(); + } while (++n < cpuid); + } + + spin_lock_hw(&__ipipe_cpu_barrier); + + __ipipe_cpu_sync = syncfn; + + /* Send the sync IPI to all processors but the current one. */ + send_IPI_allbutself(IPIPE_CRITICAL_VECTOR); + + cpus_andnot(lock_map, cpu_online_map, + __ipipe_cpu_lock_map); + + while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) + cpu_relax(); + } + + atomic_inc(&__ipipe_critical_count); + } +#endif /* CONFIG_SMP */ + + return flags; +} + +/* ipipe_critical_exit() -- Release the superlock. */ + +void ipipe_critical_exit(unsigned long flags) +{ +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { /* We might be running a SMP-kernel on a UP box... */ + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + + if (atomic_dec_and_test(&__ipipe_critical_count)) { + spin_unlock_hw(&__ipipe_cpu_barrier); + + while (!cpus_empty(__ipipe_cpu_sync_map)) + cpu_relax(); + + cpu_clear(cpuid, __ipipe_cpu_lock_map); + cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map); + } + } +#endif /* CONFIG_SMP */ + + local_irq_restore_hw(flags); +} + +void __ipipe_init_platform(void) +{ + unsigned timer_virq; + + /* + * Allocate a virtual IRQ for the decrementer trap early to + * get it mapped to IPIPE_VIRQ_BASE + */ + + timer_virq = ipipe_alloc_virq(); + + if (timer_virq != IPIPE_TIMER_VIRQ) + panic("I-pipe: cannot reserve timer virq #%d (got #%d)", + IPIPE_TIMER_VIRQ, timer_virq); + + __ipipe_decr_ticks = tb_ticks_per_jiffy; +} + +int __ipipe_ack_irq(unsigned irq) +{ + irq_desc_t *desc = irq_desc + irq; + desc->ipipe_ack(irq, desc); + return 1; +} + +void __ipipe_enable_irqdesc(unsigned irq) +{ + irq_desc[irq].status &= ~IRQ_DISABLED; +} + +static void __ipipe_enable_sync(void) +{ + __ipipe_decr_next[ipipe_processor_id()] = + __ipipe_read_timebase() + get_dec(); +} + +/* + * __ipipe_enable_pipeline() -- We are running on the boot CPU, hw + * interrupts are off, and secondary CPUs are still lost in space. + */ +void __ipipe_enable_pipeline(void) +{ + unsigned long flags; + unsigned irq; + + flags = ipipe_critical_enter(&__ipipe_enable_sync); + + /* First, virtualize all interrupts from the root domain. */ + + for (irq = 0; irq < NR_IRQS; irq++) + ipipe_virtualize_irq(ipipe_root_domain, + irq, + &__ipipe_do_IRQ, NULL, + &__ipipe_ack_irq, + IPIPE_HANDLE_MASK | IPIPE_PASS_MASK); + + /* + * We use a virtual IRQ to handle the timer irq (decrementer trap) + * which has been allocated early in __ipipe_init_platform(). + */ + + ipipe_virtualize_irq(ipipe_root_domain, + IPIPE_TIMER_VIRQ, + &__ipipe_do_timer, NULL, + NULL, IPIPE_HANDLE_MASK | IPIPE_PASS_MASK); + + + __ipipe_decr_next[ipipe_processor_id()] = + __ipipe_read_timebase() + get_dec(); + + ipipe_critical_exit(flags); +} + +int ipipe_get_sysinfo(struct ipipe_sysinfo *info) +{ + info->ncpus = num_online_cpus(); + info->cpufreq = ipipe_cpu_freq(); + info->archdep.tmirq = IPIPE_TIMER_VIRQ; + info->archdep.tmfreq = info->cpufreq; + + return 0; +} + +/* + * ipipe_trigger_irq() -- Push the interrupt at front of the pipeline + * just like if it has been actually received from a hw source. Also + * works for virtual interrupts. + */ +int ipipe_trigger_irq(unsigned irq) +{ + unsigned long flags; + + if (irq >= IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + local_irq_save_hw(flags); + + __ipipe_handle_irq(irq, NULL); + + local_irq_restore_hw(flags); + + return 1; +} + +static void __ipipe_set_decr(void) +{ + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + + disarm_decr[cpuid] = (__ipipe_decr_ticks != tb_ticks_per_jiffy); +#ifdef CONFIG_40x + /* Enable and set auto-reload. */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR) | TCR_ARE); + mtspr(SPRN_PIT, __ipipe_decr_ticks); +#else /* !CONFIG_40x */ + __ipipe_decr_next[cpuid] = __ipipe_read_timebase() + __ipipe_decr_ticks; + set_dec(__ipipe_decr_ticks); +#endif /* CONFIG_40x */ +} + +int ipipe_tune_timer(unsigned long ns, int flags) +{ + unsigned long x, ticks; + + if (flags & IPIPE_RESET_TIMER) + ticks = tb_ticks_per_jiffy; + else { + ticks = (ns / 1000) * tb_ticks_per_jiffy / (1000000 / HZ); + + if (ticks > tb_ticks_per_jiffy) + return -EINVAL; + } + + x = ipipe_critical_enter(&__ipipe_set_decr); /* Sync with all CPUs */ + __ipipe_decr_ticks = ticks; + __ipipe_set_decr(); + ipipe_critical_exit(x); + + return 0; +} + +/* + * __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic + * interrupt protection log is maintained here for each domain. Hw + * interrupts are off on entry. + */ +void __ipipe_handle_irq(int irq, struct pt_regs *regs) +{ + struct ipipe_domain *this_domain, *next_domain; + struct list_head *head, *pos; + ipipe_declare_cpuid; + int m_ack; + + m_ack = (regs == NULL); /* Software-triggered IRQs do not need + * any ack. */ + if (irq >= IPIPE_NR_IRQS) { + printk(KERN_ERR "I-pipe: spurious interrupt %d\n", irq); + return; + } + + ipipe_load_cpuid(); + + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + if (unlikely(test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control))) + head = &this_domain->p_link; + else { + head = __ipipe_pipeline.next; + next_domain = list_entry(head, struct ipipe_domain, p_link); + if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { + if (!m_ack && next_domain->irqs[irq].acknowledge != NULL) + next_domain->irqs[irq].acknowledge(irq); + if (likely(__ipipe_dispatch_wired(next_domain, irq))) + goto finalize; + return; + } + } + + /* Ack the interrupt. */ + + pos = head; + + while (pos != &__ipipe_pipeline) { + next_domain = list_entry(pos, struct ipipe_domain, p_link); + /* + * For each domain handling the incoming IRQ, mark it as + * pending in its log. + */ + if (test_bit(IPIPE_HANDLE_FLAG, + &next_domain->irqs[irq].control)) { + /* + * Domains that handle this IRQ are polled for + * acknowledging it by decreasing priority order. The + * interrupt must be made pending _first_ in the + * domain's status flags before the PIC is unlocked. + */ + + next_domain->cpudata[cpuid].irq_counters[irq].total_hits++; + next_domain->cpudata[cpuid].irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(next_domain, cpuid, irq); + + /* + * Always get the first master acknowledge available. + * Once we've got it, allow slave acknowledge + * handlers to run (until one of them stops us). + */ + if (next_domain->irqs[irq].acknowledge != NULL && !m_ack) + m_ack = next_domain->irqs[irq].acknowledge(irq); + } + + /* + * If the domain does not want the IRQ to be passed down the + * interrupt pipe, exit the loop now. + */ + + if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) + break; + + pos = next_domain->p_link.next; + } + +finalize: + /* + * Now walk the pipeline, yielding control to the highest + * priority domain that has pending interrupt(s) or + * immediately to the current domain if the interrupt has been + * marked as 'sticky'. This search does not go beyond the + * current domain in the pipeline. + */ + + __ipipe_walk_pipeline(head, cpuid); +} + +int __ipipe_grab_irq(struct pt_regs *regs) +{ + extern int ppc_spurious_interrupts; + ipipe_declare_cpuid; + int irq; + + irq = ppc_md.get_irq(); + + if (irq != NO_IRQ && irq != NO_IRQ_IGNORE) { +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_begin(irq); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + __ipipe_handle_irq(irq, regs); +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_end(irq); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + } else if (irq != NO_IRQ_IGNORE) + ppc_spurious_interrupts++; + + ipipe_load_cpuid(); + + return (per_cpu(ipipe_percpu_domain, cpuid) == ipipe_root_domain && + !test_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)); +} + +static void __ipipe_do_IRQ(unsigned irq, void *cookie) +{ + struct pt_regs *old_regs; +#ifdef CONFIG_IRQSTACKS + struct thread_info *curtp, *irqtp; +#endif + + /* Provide a valid register frame, even if not the exact one. */ + old_regs = set_irq_regs(__ipipe_tick_regs + smp_processor_id()); + + irq_enter(); + +#ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 2KB free? */ + { + long sp; + + sp = __get_SP() & (THREAD_SIZE-1); + + if (unlikely(sp < (sizeof(struct thread_info) + 2048))) { + printk("do_IRQ: stack overflow: %ld\n", + sp - sizeof(struct thread_info)); + dump_stack(); + } + } +#endif + +#ifdef CONFIG_IRQSTACKS + /* Switch to the irq stack to handle this */ + curtp = current_thread_info(); + irqtp = hardirq_ctx[smp_processor_id()]; + if (curtp != irqtp) { + struct irq_desc *desc = irq_desc + irq; + void *handler = desc->handle_irq; + if (handler == NULL) + handler = &__do_IRQ; + irqtp->task = curtp->task; + irqtp->flags = 0; + call_handle_irq(irq, desc, irqtp, handler); + irqtp->task = NULL; + if (irqtp->flags) + set_bits(irqtp->flags, &curtp->flags); + } else +#endif + generic_handle_irq(irq); + + irq_exit(); + + set_irq_regs(old_regs); +} + +static void __ipipe_do_timer(unsigned irq, void *cookie) +{ + timer_interrupt(__ipipe_tick_regs + smp_processor_id()); +} + +int __ipipe_grab_timer(struct pt_regs *regs) +{ + ipipe_declare_cpuid; + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_begin(IPIPE_TIMER_VIRQ); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + +#ifdef CONFIG_POWER4 + /* On 970 CPUs DEC cannot be disabled, and without setting DEC + * here, DEC interrupt would be triggered as soon as interrupts + * are enabled in __ipipe_sync_stage + */ + set_dec(0x7fffffff); +#endif + + __ipipe_tick_regs[cpuid].msr = regs->msr; /* for timer_interrupt() */ + +#ifndef CONFIG_40x + if (__ipipe_decr_ticks != tb_ticks_per_jiffy) { + unsigned long long next_date, now; + + next_date = __ipipe_decr_next[cpuid]; + + while ((now = __ipipe_read_timebase()) >= next_date) + next_date += __ipipe_decr_ticks; + + set_dec(next_date - now); + + __ipipe_decr_next[cpuid] = next_date; + } +#endif /* !CONFIG_40x */ + + __ipipe_handle_irq(IPIPE_TIMER_VIRQ, NULL); + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_end(IPIPE_TIMER_VIRQ); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + + ipipe_load_cpuid(); + + return (per_cpu(ipipe_percpu_domain, cpuid) == ipipe_root_domain && + !test_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)); +} + +int __ipipe_check_root(struct pt_regs *regs) +{ + ipipe_declare_cpuid; + /* + * This routine is called with hw interrupts off, so no migration + * can occur while checking the identity of the current domain. + */ + ipipe_load_cpuid(); + return per_cpu(ipipe_percpu_domain, cpuid) == ipipe_root_domain; +} + +void __ipipe_fast_stall_root(void) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); /* Care for migration. */ + + set_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + + ipipe_put_cpu(flags); +} + +void __ipipe_fast_unstall_root(void) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); /* Care for migration. */ + + clear_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + + ipipe_put_cpu(flags); +} + +int __ipipe_syscall_root(struct pt_regs *regs) +{ + ipipe_declare_cpuid; + unsigned long flags; + + /* + * This routine either returns: + * 0 -- if the syscall is to be passed to Linux; + * >0 -- if the syscall should not be passed to Linux, and no + * tail work should be performed; + * <0 -- if the syscall should not be passed to Linux but the + * tail work has to be performed (for handling signals etc). + */ + + if (__ipipe_syscall_watched_p(current, regs->gpr[0]) && + __ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL) && + __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL,regs) > 0) { + if (ipipe_current_domain == ipipe_root_domain && !in_atomic()) { + /* + * Sync pending VIRQs before _TIF_NEED_RESCHED + * is tested. + */ + ipipe_lock_cpu(flags); + if ((ipipe_root_domain->cpudata[cpuid].irq_pending_hi & IPIPE_IRQMASK_VIRT) != 0) + __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); + ipipe_unlock_cpu(flags); + return -1; + } + return 1; + } + + return 0; +} + +int __ipipe_pin_range_mapping(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + /* Actually, we don't need this for this arch (yet?). */ + return 0; +} + +EXPORT_SYMBOL(__ipipe_decr_ticks); +EXPORT_SYMBOL(__ipipe_decr_next); +EXPORT_SYMBOL(ipipe_critical_enter); +EXPORT_SYMBOL(ipipe_critical_exit); +EXPORT_SYMBOL(ipipe_trigger_irq); +EXPORT_SYMBOL(ipipe_get_sysinfo); +EXPORT_SYMBOL(ipipe_tune_timer); + +void atomic_set_mask(unsigned long mask, + unsigned long *ptr); + +void atomic_clear_mask(unsigned long mask, + unsigned long *ptr); + +extern unsigned long context_map[]; + +EXPORT_SYMBOL(disarm_decr); +EXPORT_SYMBOL_GPL(__switch_to); +EXPORT_SYMBOL_GPL(show_stack); +EXPORT_SYMBOL_GPL(atomic_set_mask); +EXPORT_SYMBOL_GPL(atomic_clear_mask); +EXPORT_SYMBOL_GPL(context_map); +EXPORT_SYMBOL_GPL(_switch); +EXPORT_SYMBOL_GPL(last_task_used_math); +#ifdef FEW_CONTEXTS +EXPORT_SYMBOL_GPL(nr_free_contexts); +EXPORT_SYMBOL_GPL(context_mm); +EXPORT_SYMBOL_GPL(steal_context); +#endif + +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +void notrace _mcount(void); +EXPORT_SYMBOL(_mcount); +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/irq.c linux-2.6.21.5/arch/powerpc/kernel/irq.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/irq.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/irq.c 2007-08-07 10:56:22.000000000 +0200 @@ -70,7 +70,7 @@ #endif int __irq_offset_value; -static int ppc_spurious_interrupts; +int ppc_spurious_interrupts; #ifdef CONFIG_PPC32 EXPORT_SYMBOL(__irq_offset_value); diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/Makefile linux-2.6.21.5/arch/powerpc/kernel/Makefile --- linux-2.6.21.5.orig/arch/powerpc/kernel/Makefile 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/Makefile 2007-08-07 10:56:22.000000000 +0200 @@ -57,6 +57,8 @@ obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_IPIPE) += ipipe.o +obj-$(CONFIG_IPIPE_TRACE_MCOUNT) += mcount.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/mcount.S linux-2.6.21.5/arch/powerpc/kernel/mcount.S --- linux-2.6.21.5.orig/arch/powerpc/kernel/mcount.S 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/arch/powerpc/kernel/mcount.S 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,75 @@ +/* + * linux/arch/powerpc/kernel/mcount.S + * + * Adapted from glibc's ppc32 profiling support --rpm. + */ + +#include +#include +#include + +/* + * Excerpt from sysdeps/powerpc/powerpc32/ppc-mcount.S + * + * Do profiling as described in the SYSV ELF ABI, _mcount is called + * with the address of a data word in r0 (that is different for every + * routine, initialised to 0, and otherwise unused). The caller has + * put the address the caller will return to in the usual place on the stack, + * 4(r1). _mcount is responsible for ensuring that when it returns no + * argument-passing registers are disturbed, and that the LR is set back + * to (what the caller sees as) 4(r1). + * + * This is intended so that the following code can be inserted at the + * front of any routine without changing the routine: + * + * mflr r0 + * lis r12,0b@domain.hid + * stw r0,4(r1) + * addi r0,r12,0b@domain.hid + * bl _mcount + */ + +.globl _mcount +_mcount: + stwu r1,-48(r1) +/* We need to save the parameter-passing registers. */ + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + mflr r4 + mfcr r5 + stw r4, 44(r1) + stw r5, 8(r1) + tophys(r0,0) + addis r6,r0,ipipe_trace_enable@domain.hid + lwz r3,ipipe_trace_enable@domain.hid) + cmpwi r3,0 + beq 1f + li r3,0 + lwz r5, 52(r1) + li r6,0 + bl __ipipe_trace +1: + /* Restore the registers... */ + lwz r6, 8(r1) + lwz r0, 44(r1) + lwz r3, 12(r1) + mtctr r0 + lwz r4, 16(r1) + mtcrf 0xff,r6 + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r0, 52(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + mtlr r0 + lwz r9, 36(r1) + lwz r10,40(r1) + /* ...unwind the stack frame, and return to your usual programming. */ + addi r1,r1,48 + bctr diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/setup_32.c linux-2.6.21.5/arch/powerpc/kernel/setup_32.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/setup_32.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/setup_32.c 2007-08-07 10:56:22.000000000 +0200 @@ -85,7 +85,7 @@ * from the address that it was linked at, so we must use RELOC/PTRRELOC * to access static data (including strings). -- paulus */ -unsigned long __init early_init(unsigned long dt_ptr) +unsigned long notrace __init early_init(unsigned long dt_ptr) { unsigned long offset = reloc_offset(); struct cpu_spec *spec; diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/time.c linux-2.6.21.5/arch/powerpc/kernel/time.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/time.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/time.c 2007-08-07 10:56:22.000000000 +0200 @@ -74,6 +74,8 @@ #endif #include +unsigned long disarm_decr[NR_CPUS]; + /* keep track of when we need to update the rtc */ time_t last_rtc_update; #ifdef CONFIG_PPC_ISERIES @@ -625,7 +627,9 @@ #endif old_regs = set_irq_regs(regs); +#ifndef CONFIG_IPIPE irq_enter(); +#endif profile_tick(CPU_PROFILING); calculate_steal_time(); @@ -651,7 +655,11 @@ * is the case. */ if (!cpu_is_offline(cpu)) +#ifdef CONFIG_IPIPE + account_process_time(__ipipe_tick_regs + cpu); +#else account_process_time(regs); +#endif /* * No need to check whether cpu is offline here; boot_cpuid @@ -672,7 +680,8 @@ } next_dec = tb_ticks_per_jiffy - ticks; - set_dec(next_dec); + if ( !disarm_decr[cpu] ) + set_dec(next_dec); #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES) && hvlpevent_is_pending()) @@ -687,7 +696,9 @@ } #endif +#ifndef CONFIG_IPIPE irq_exit(); +#endif set_irq_regs(old_regs); } diff -Naur linux-2.6.21.5.orig/arch/powerpc/kernel/traps.c linux-2.6.21.5/arch/powerpc/kernel/traps.c --- linux-2.6.21.5.orig/arch/powerpc/kernel/traps.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/kernel/traps.c 2007-08-07 10:56:22.000000000 +0200 @@ -319,6 +319,9 @@ int recover = 0; unsigned long reason = get_mc_reason(regs); + if (ipipe_trap_notify(IPIPE_TRAP_MCE,regs)) + return; + /* See if any machine dependent calls */ if (ppc_md.machine_check_exception) recover = ppc_md.machine_check_exception(regs); @@ -488,11 +491,17 @@ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); + if (ipipe_trap_notify(IPIPE_TRAP_UNKNOWN,regs)) + return; + _exception(SIGTRAP, regs, 0, 0); } void instruction_breakpoint_exception(struct pt_regs *regs) { + if (ipipe_trap_notify(IPIPE_TRAP_IABR,regs)) + return; + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) return; @@ -503,6 +512,8 @@ void RunModeException(struct pt_regs *regs) { + if (ipipe_trap_notify(IPIPE_TRAP_RM,regs)) + return; _exception(SIGTRAP, regs, 0, 0); } @@ -510,6 +521,9 @@ { regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + if (ipipe_trap_notify(IPIPE_TRAP_SSTEP,regs)) + return; + if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) return; @@ -529,6 +543,8 @@ { if (single_stepping(regs)) { clear_single_step(regs); + if (ipipe_trap_notify(IPIPE_TRAP_SSTEP,regs)) + return; _exception(SIGTRAP, regs, TRAP_TRACE, 0); } } @@ -745,6 +761,9 @@ /* We can now get here via a FP Unavailable exception if the core * has no FPU, in that case the reason flags will be 0 */ + if (ipipe_trap_notify(IPIPE_TRAP_PCE,regs)) + return; + if (reason & REASON_FP) { /* IEEE FP exception */ parse_fpe(regs); @@ -817,6 +836,9 @@ { int sig, code, fixed = 0; + if (ipipe_trap_notify(IPIPE_TRAP_ALIGNMENT,regs)) + return; + /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) fixed = fix_alignment(regs); @@ -854,6 +876,8 @@ { printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", regs->nip, regs->msr); + if (ipipe_trap_notify(IPIPE_TRAP_NREC,regs)) + return; debugger(regs); die("nonrecoverable exception", regs, SIGKILL); } @@ -869,11 +893,16 @@ { printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); + if (ipipe_trap_notify(IPIPE_TRAP_FPUNAVAIL,regs)) + return; die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); } void altivec_unavailable_exception(struct pt_regs *regs) { + if (ipipe_trap_notify(IPIPE_TRAP_ALTUNAVAIL,regs)) + return; + if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ @@ -898,6 +927,9 @@ extern int Soft_emulate_8xx(struct pt_regs *); int errcode; + if (ipipe_trap_notify(IPIPE_TRAP_SOFTEMU,regs)) + return; + CHECK_FULL_REGS(regs); if (!user_mode(regs)) { @@ -947,6 +979,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status) { + if (ipipe_trap_notify(IPIPE_TRAP_DEBUG,regs)) + return; + if (debug_status & DBSR_IC) { /* instruction completion */ regs->msr &= ~MSR_DE; if (user_mode(regs)) { @@ -977,6 +1012,9 @@ { int err; + if (ipipe_trap_notify(IPIPE_TRAP_ALTASSIST,regs)) + return; + if (!user_mode(regs)) { printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" " at %lx\n", regs->nip); @@ -1014,8 +1052,11 @@ * as priv ops, in the future we could try to do * something smarter */ - if (error_code & (ESR_DLK|ESR_ILK)) + if (error_code & (ESR_DLK|ESR_ILK)) { + if (ipipe_trap_notify(IPIPE_TRAP_CACHE,regs)) + return; _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + } return; } #endif /* CONFIG_FSL_BOOKE */ @@ -1027,6 +1068,9 @@ int fpexc_mode; int code = 0; + if (ipipe_trap_notify(IPIPE_TRAP_SPE,regs)) + return; + spefscr = current->thread.spefscr; fpexc_mode = current->thread.fpexc_mode; @@ -1066,6 +1110,8 @@ { printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n", regs->trap, regs->nip); + if (ipipe_trap_notify(IPIPE_TRAP_NREC,regs)) + return; die("Unrecoverable exception", regs, SIGABRT); } diff -Naur linux-2.6.21.5.orig/arch/powerpc/mm/fault.c linux-2.6.21.5/arch/powerpc/mm/fault.c --- linux-2.6.21.5.orig/arch/powerpc/mm/fault.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/mm/fault.c 2007-08-07 10:56:22.000000000 +0200 @@ -160,6 +160,9 @@ int trap = TRAP(regs); int is_exec = trap == 0x400; + if (ipipe_trap_notify(IPIPE_TRAP_ACCESS,regs)) + return 0; + #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* * Fortunately the bit assignments in SRR1 for an instruction diff -Naur linux-2.6.21.5.orig/arch/powerpc/platforms/iseries/irq.c linux-2.6.21.5/arch/powerpc/platforms/iseries/irq.c --- linux-2.6.21.5.orig/arch/powerpc/platforms/iseries/irq.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/platforms/iseries/irq.c 2007-08-07 10:56:22.000000000 +0200 @@ -81,7 +81,7 @@ } data; }; -static DEFINE_SPINLOCK(pending_irqs_lock); +static IPIPE_DEFINE_SPINLOCK(pending_irqs_lock); static int num_pending_irqs; static int pending_irqs[NR_IRQS]; diff -Naur linux-2.6.21.5.orig/arch/powerpc/platforms/powermac/pic.c linux-2.6.21.5/arch/powerpc/platforms/powermac/pic.c --- linux-2.6.21.5.orig/arch/powerpc/platforms/powermac/pic.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/platforms/powermac/pic.c 2007-08-07 10:56:22.000000000 +0200 @@ -63,7 +63,7 @@ static int max_real_irqs; static u32 level_mask[4]; -static DEFINE_SPINLOCK(pmac_pic_lock); +static IPIPE_DEFINE_SPINLOCK(pmac_pic_lock); #define NR_MASK_WORDS ((NR_IRQS + 31) / 32) static unsigned long ppc_lost_interrupts[NR_MASK_WORDS]; diff -Naur linux-2.6.21.5.orig/arch/powerpc/platforms/ps3/interrupt.c linux-2.6.21.5/arch/powerpc/platforms/ps3/interrupt.c --- linux-2.6.21.5.orig/arch/powerpc/platforms/ps3/interrupt.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/platforms/ps3/interrupt.c 2007-08-07 10:56:22.000000000 +0200 @@ -71,7 +71,7 @@ u64 unused_2[3]; }; u64 ipi_debug_brk_mask; - spinlock_t lock; + ipipe_spinlock_t lock; }; /** diff -Naur linux-2.6.21.5.orig/arch/powerpc/sysdev/i8259.c linux-2.6.21.5/arch/powerpc/sysdev/i8259.c --- linux-2.6.21.5.orig/arch/powerpc/sysdev/i8259.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/sysdev/i8259.c 2007-08-07 10:56:22.000000000 +0200 @@ -23,7 +23,7 @@ #define cached_A1 (cached_8259[0]) #define cached_21 (cached_8259[1]) -static DEFINE_SPINLOCK(i8259_lock); +static IPIPE_DEFINE_SPINLOCK(i8259_lock); static struct device_node *i8259_node; static struct irq_host *i8259_host; diff -Naur linux-2.6.21.5.orig/arch/powerpc/sysdev/ipic.c linux-2.6.21.5/arch/powerpc/sysdev/ipic.c --- linux-2.6.21.5.orig/arch/powerpc/sysdev/ipic.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/sysdev/ipic.c 2007-08-07 10:56:22.000000000 +0200 @@ -30,7 +30,7 @@ #include "ipic.h" static struct ipic * primary_ipic; -static DEFINE_SPINLOCK(ipic_lock); +static IPIPE_DEFINE_SPINLOCK(ipic_lock); static struct ipic_info ipic_info[] = { [9] = { diff -Naur linux-2.6.21.5.orig/arch/powerpc/sysdev/mpic.c linux-2.6.21.5/arch/powerpc/sysdev/mpic.c --- linux-2.6.21.5.orig/arch/powerpc/sysdev/mpic.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/sysdev/mpic.c 2007-08-07 11:00:27.000000000 +0200 @@ -44,7 +44,7 @@ static struct mpic *mpics; static struct mpic *mpic_primary; -static DEFINE_SPINLOCK(mpic_lock); +static IPIPE_DEFINE_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS @@ -564,19 +564,24 @@ unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); + unsigned long flags; DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src); + local_irq_save_hw_cond(flags); + mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & ~MPIC_VECPRI_MASK); /* make sure mask gets to controller before we return to user */ do { if (!loops--) { - printk(KERN_ERR "mpic_enable_irq timeout\n"); + printk(KERN_ERR "mpic_unmask_irq timeout (irq %u)\n", irq); break; } } while(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK); + + local_irq_restore_hw_cond(flags); } static void mpic_mask_irq(unsigned int irq) @@ -584,9 +589,12 @@ unsigned int loops = 100000; struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); + unsigned long flags; DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); + local_irq_save_hw_cond(flags); + mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) | MPIC_VECPRI_MASK); @@ -594,10 +602,12 @@ /* make sure mask gets to controller before we return to user */ do { if (!loops--) { - printk(KERN_ERR "mpic_enable_irq timeout\n"); + printk(KERN_ERR "mpic_mask_irq timeout (irq=%u)\n", irq); break; } } while(!(mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)) & MPIC_VECPRI_MASK)); + + local_irq_restore_hw_cond(flags); } static void mpic_end_irq(unsigned int irq) @@ -739,6 +749,7 @@ unsigned int src = mpic_irq_to_hw(virq); struct irq_desc *desc = get_irq_desc(virq); unsigned int vecpri, vold, vnew; + unsigned long flags; DBG("mpic: set_irq_type(mpic:@%p,virq:%d,src:0x%x,type:0x%x)\n", mpic, virq, src, flow_type); @@ -763,6 +774,8 @@ else vecpri = mpic_type_to_vecpri(mpic, flow_type); + local_irq_save_hw_cond(flags); + vold = mpic_irq_read(src, MPIC_INFO(IRQ_VECTOR_PRI)); vnew = vold & ~(MPIC_INFO(VECPRI_POLARITY_MASK) | MPIC_INFO(VECPRI_SENSE_MASK)); @@ -770,6 +783,8 @@ if (vold != vnew) mpic_irq_write(src, MPIC_INFO(IRQ_VECTOR_PRI), vnew); + local_irq_restore_hw_cond(flags); + return 0; } diff -Naur linux-2.6.21.5.orig/arch/powerpc/sysdev/tsi108_pci.c linux-2.6.21.5/arch/powerpc/sysdev/tsi108_pci.c --- linux-2.6.21.5.orig/arch/powerpc/sysdev/tsi108_pci.c 2007-08-07 10:55:07.000000000 +0200 +++ linux-2.6.21.5/arch/powerpc/sysdev/tsi108_pci.c 2007-08-07 10:56:22.000000000 +0200 @@ -249,7 +249,9 @@ { u_int irp_cfg; int int_line = (irq - IRQ_PCI_INTAD_BASE); + unsigned long flags; + local_irq_save_hw_cond(flags); irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); mb(); irp_cfg |= (1 << int_line); /* INTx_DIR = output */ @@ -257,19 +259,23 @@ tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg); mb(); irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); + local_irq_restore_hw_cond(flags); } static void tsi108_pci_int_unmask(u_int irq) { u_int irp_cfg; int int_line = (irq - IRQ_PCI_INTAD_BASE); + unsigned long flags; + local_irq_save_hw_cond(flags); irp_cfg = tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL); mb(); irp_cfg &= ~(1 << int_line); irp_cfg |= (3 << (8 + (int_line * 2))); tsi108_write_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_CFG_CTL, irp_cfg); mb(); + local_irq_restore_hw_cond(flags); } static void init_pci_source(void) @@ -360,6 +366,9 @@ static void tsi108_pci_irq_end(u_int irq) { + unsigned long flags; + + local_irq_save_hw_cond(flags); tsi108_pci_int_unmask(irq); /* Enable interrupts from PCI block */ @@ -367,6 +376,7 @@ tsi108_read_reg(TSI108_PCI_OFFSET + TSI108_PCI_IRP_ENABLE) | TSI108_PCI_IRP_ENABLE_P_INT); + local_irq_restore_hw_cond(flags); mb(); } diff -Naur linux-2.6.21.5.orig/drivers/pci/htirq.c linux-2.6.21.5/drivers/pci/htirq.c --- linux-2.6.21.5.orig/drivers/pci/htirq.c 2007-08-07 10:55:19.000000000 +0200 +++ linux-2.6.21.5/drivers/pci/htirq.c 2007-08-07 10:56:22.000000000 +0200 @@ -21,7 +21,7 @@ * With multiple simultaneous hypertransport irq devices it might pay * to make this more fine grained. But start with simple, stupid, and correct. */ -static DEFINE_SPINLOCK(ht_irq_lock); +static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); struct ht_irq_cfg { struct pci_dev *dev; diff -Naur linux-2.6.21.5.orig/drivers/serial/mpc52xx_uart.c linux-2.6.21.5/drivers/serial/mpc52xx_uart.c --- linux-2.6.21.5.orig/drivers/serial/mpc52xx_uart.c 2007-08-07 10:55:23.000000000 +0200 +++ linux-2.6.21.5/drivers/serial/mpc52xx_uart.c 2007-08-07 10:56:22.000000000 +0200 @@ -803,6 +803,40 @@ #endif +#ifdef CONFIG_IPIPE + +#include + +void __ipipe_serial_debug(const char *fmt, ...) +{ + struct console *co = MPC52xx_PSC_CONSOLE; + unsigned long flags, count; + struct uart_port *port; + char buf[128]; + va_list ap; + + if (co->index < 0) + return; /* No console. */ + + port = &mpc52xx_uart_ports[co->index]; + + if (!port->mapbase) + return; /* Too early. */ + + va_start(ap, fmt); + vsprintf(buf, fmt, ap); + va_end(ap); + count = strlen(buf); + + local_irq_save_hw(flags); + mpc52xx_console_write(co, buf, count); + local_irq_restore_hw(flags); +} + +EXPORT_SYMBOL(__ipipe_serial_debug); + +#endif + /* ======================================================================== */ /* UART Driver */ /* ======================================================================== */ diff -Naur linux-2.6.21.5.orig/include/asm-powerpc/hw_irq.h linux-2.6.21.5/include/asm-powerpc/hw_irq.h --- linux-2.6.21.5.orig/include/asm-powerpc/hw_irq.h 2007-08-07 10:55:35.000000000 +0200 +++ linux-2.6.21.5/include/asm-powerpc/hw_irq.h 2007-08-07 11:08:09.000000000 +0200 @@ -16,6 +16,10 @@ #ifdef CONFIG_PPC64 #include +#ifdef CONFIG_IPIPE +#error "I-pipe: arch/powerpc64 not supported yet - use an older Adeos patch." +#endif + static inline unsigned long local_get_flags(void) { unsigned long flags; @@ -51,17 +55,36 @@ #define hard_irq_enable() __mtmsrd(mfmsr() | MSR_EE, 1) #define hard_irq_disable() __mtmsrd(mfmsr() & ~MSR_EE, 1) -#else +#else /* !CONFIG_PPC64 */ + +static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long real) +{ + /* Merge virtual and real interrupt mask bits into a single + 32bit word. */ + return (real & ~(1 << 31)) | ((virt != 0) << 31); +} +static inline int raw_demangle_irq_bits(unsigned long *x) +{ + int virt = (*x & (1 << 31)) != 0; + *x &= ~(1L << 31); + return virt; +} + +#define local_save_flags_hw(x) ((x) = mfmsr()) +#define local_test_iflag_hw(x) ((x) & MSR_EE) +#define irqs_disabled_hw() ((mfmsr() & MSR_EE) == 0) +#define local_irq_save_hw_notrace(x) local_irq_save_ptr_hw(&(x)) +#define raw_irqs_disabled_flags(x) (!local_test_iflag_hw(x)) #if defined(CONFIG_BOOKE) -#define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") +#define local_irq_restore_hw_notrace(x) \ + __asm__ __volatile__("wrtee %0" : : "r" (x) : "memory") #else #define SET_MSR_EE(x) mtmsr(x) -#define local_irq_restore(flags) mtmsr(flags) +#define local_irq_restore_hw_notrace(x) mtmsr(x) #endif -static inline void local_irq_disable(void) +static inline void local_irq_disable_hw_notrace(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 0": : :"memory"); @@ -73,7 +96,7 @@ #endif } -static inline void local_irq_enable(void) +static inline void local_irq_enable_hw_notrace(void) { #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 1": : :"memory"); @@ -85,11 +108,11 @@ #endif } -static inline void local_irq_save_ptr(unsigned long *flags) +static inline void local_irq_save_ptr_hw(unsigned long *x) { unsigned long msr; msr = mfmsr(); - *flags = msr; + *x = msr; #ifdef CONFIG_BOOKE __asm__ __volatile__("wrteei 0": : :"memory"); #else @@ -98,9 +121,108 @@ __asm__ __volatile__("": : :"memory"); } -#define local_save_flags(flags) ((flags) = mfmsr()) -#define local_irq_save(flags) local_irq_save_ptr(&flags) -#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) +#ifdef CONFIG_IPIPE + +#include + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + +#include + +static inline void local_irq_disable_hw(void) +{ + if (!irqs_disabled_hw()) { + local_irq_disable_hw_notrace(); + ipipe_trace_begin(0x80000000); + } +} + +static inline void local_irq_enable_hw(void) +{ + if (irqs_disabled_hw()) { + ipipe_trace_end(0x80000000); + local_irq_enable_hw_notrace(); + } +} + +#define local_irq_save_hw(x) \ +do { \ + local_irq_save_ptr_hw(&(x)); \ + if (local_test_iflag_hw(x)) \ + ipipe_trace_begin(0x80000001); \ +} while(0) + +static inline void local_irq_restore_hw(unsigned long x) +{ + if (local_test_iflag_hw(x)) + ipipe_trace_end(0x80000001); + + local_irq_restore_hw_notrace(x); +} + +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ + +#define local_irq_disable_hw local_irq_disable_hw_notrace +#define local_irq_enable_hw local_irq_enable_hw_notrace +#define local_irq_save_hw local_irq_save_hw_notrace +#define local_irq_restore_hw local_irq_restore_hw_notrace + +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + +static inline void local_irq_disable(void) +{ + ipipe_check_context(ipipe_root_domain); + __ipipe_stall_root(); + barrier(); +} + +static inline void local_irq_enable(void) +{ + barrier(); + __ipipe_unstall_root(); +} + +static inline void local_irq_save_ptr(unsigned long *x) +{ + *x = (!__ipipe_test_and_stall_root()) << MSR_EE_LG; + barrier(); +} + +static inline void local_irq_restore(unsigned long x) +{ + barrier(); + __ipipe_restore_root(!(x & MSR_EE)); +} + +#define local_save_flags(x) \ +do { \ + (x) = (!__ipipe_test_root()) << MSR_EE_LG; \ + barrier(); \ +} while(0) + +#define local_irq_save(x) \ +do { \ + ipipe_check_context(ipipe_root_domain); \ + local_irq_save_ptr(&(x)); \ +} while(0) + +#define irqs_disabled() __ipipe_test_root() + +#else /* !CONFIG_IPIPE */ + +#define local_irq_disable_hw local_irq_disable_hw_notrace +#define local_irq_enable_hw local_irq_enable_hw_notrace +#define local_irq_save_hw local_irq_save_hw_notrace +#define local_irq_restore_hw local_irq_restore_hw_notrace +#define local_irq_restore(x) local_irq_restore_hw(x) +#define local_irq_disable() local_irq_disable_hw() +#define local_irq_enable() local_irq_enable_hw() +#define local_irq_save_ptr(x) local_irq_save_ptr_hw(x) +#define irqs_disabled() irqs_disabled_hw() +#define local_save_flags(x) local_save_flags_hw(x) +#define local_irq_save(x) local_irq_save_hw(x) + +#endif /* !CONFIG_IPIPE */ #define hard_irq_enable() local_irq_enable() #define hard_irq_disable() local_irq_disable() diff -Naur linux-2.6.21.5.orig/include/asm-powerpc/ipipe.h linux-2.6.21.5/include/asm-powerpc/ipipe.h --- linux-2.6.21.5.orig/include/asm-powerpc/ipipe.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/include/asm-powerpc/ipipe.h 2007-08-07 11:08:20.000000000 +0200 @@ -0,0 +1,219 @@ +/* -*- linux-c -*- + * include/asm-ppc/ipipe.h + * + * Copyright (C) 2002-2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __POWERPC_IPIPE_H +#define __POWERPC_IPIPE_H + +#ifdef CONFIG_IPIPE + +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_ARCH_STRING "1.6-04" +#define IPIPE_MAJOR_NUMBER 1 +#define IPIPE_MINOR_NUMBER 6 +#define IPIPE_PATCH_NUMBER 4 + +#define IPIPE_NR_XIRQS NR_IRQS +#define IPIPE_IRQ_ISHIFT 5 /* 2^5 for 32bits arch. */ + +/* + * The first virtual interrupt is reserved for the timer (see + * __ipipe_init_platform). + */ +#define IPIPE_TIMER_VIRQ IPIPE_VIRQ_BASE + +#ifdef CONFIG_SMP +#error "I-pipe/powerpc: SMP not yet implemented" +#define ipipe_processor_id() (current_thread_info()->cpu) +#else /* !CONFIG_SMP */ +#define ipipe_processor_id() 0 +#endif /* CONFIG_SMP */ + +#define prepare_arch_switch(next) \ +do { \ + ipipe_schedule_notify(current, next); \ + local_irq_disable_hw(); \ +} while(0) + +#define task_hijacked(p) \ + ({ \ + int x = ipipe_current_domain != ipipe_root_domain; \ + __clear_bit(IPIPE_SYNC_FLAG, \ + &ipipe_root_domain->cpudata[task_cpu(p)].status); \ + local_irq_enable_hw(); x; \ + }) + + /* PPC traps */ +#define IPIPE_TRAP_ACCESS 0 /* Data or instruction access exception */ +#define IPIPE_TRAP_ALIGNMENT 1 /* Alignment exception */ +#define IPIPE_TRAP_ALTUNAVAIL 2 /* Altivec unavailable */ +#define IPIPE_TRAP_PCE 3 /* Program check exception */ +#define IPIPE_TRAP_MCE 4 /* Machine check exception */ +#define IPIPE_TRAP_UNKNOWN 5 /* Unknown exception */ +#define IPIPE_TRAP_IABR 6 /* Instruction breakpoint */ +#define IPIPE_TRAP_RM 7 /* Run mode exception */ +#define IPIPE_TRAP_SSTEP 8 /* Single-step exception */ +#define IPIPE_TRAP_NREC 9 /* Non-recoverable exception */ +#define IPIPE_TRAP_SOFTEMU 10 /* Software emulation */ +#define IPIPE_TRAP_DEBUG 11 /* Debug exception */ +#define IPIPE_TRAP_SPE 12 /* SPE exception */ +#define IPIPE_TRAP_ALTASSIST 13 /* Altivec assist exception */ +#define IPIPE_TRAP_CACHE 14 /* Cache-locking exception (FSL) */ +#define IPIPE_TRAP_FPUNAVAIL 15 /* FP unavailable exception */ +#define IPIPE_NR_FAULTS 16 +/* Pseudo-vectors used for kernel events */ +#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS +#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) +#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) +#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) +#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) +#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) +#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) +#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) +#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP +#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) + +struct ipipe_domain; + +struct ipipe_sysinfo { + + int ncpus; /* Number of CPUs on board */ + u64 cpufreq; /* CPU frequency (in Hz) */ + + /* Arch-dependent block */ + + struct { + unsigned tmirq; /* Timer tick IRQ */ + u64 tmfreq; /* Timer frequency */ + } archdep; +}; + +#define ipipe_read_tsc(t) \ + ({ \ + unsigned long __tbu; \ + __asm__ __volatile__ ("1: mftbu %0\n" \ + "mftb %1\n" \ + "mftbu %2\n" \ + "cmpw %2,%0\n" \ + "bne- 1b\n" \ + :"=r" (((unsigned long *)&t)[0]), \ + "=r" (((unsigned long *)&t)[1]), \ + "=r" (__tbu)); \ + t; \ + }) + +#define __ipipe_read_timebase() \ + ({ \ + unsigned long long t; \ + ipipe_read_tsc(t); \ + t; \ + }) + +#define ipipe_cpu_freq() (HZ * tb_ticks_per_jiffy) +#define ipipe_tsc2ns(t) ((((unsigned long)(t)) * 1000) / (ipipe_cpu_freq() / 1000000)) + +#define ipipe_tsc2us(t) \ +({ \ + unsigned long long delta = (t); \ + do_div(delta, ipipe_cpu_freq()/1000000+1); \ + (unsigned long)delta; \ +}) + +/* Private interface -- Internal use only */ + +#define __ipipe_check_platform() do { } while(0) + +#define __ipipe_enable_irq(irq) enable_irq(irq) + +#define __ipipe_disable_irq(irq) disable_irq(irq) + +#ifdef CONFIG_SMP +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); +#else +#define __ipipe_hook_critical_ipi(ipd) do { } while(0) +#endif + +void __ipipe_enable_irqdesc(unsigned irq); + +void __ipipe_init_platform(void); + +void __ipipe_enable_pipeline(void); + +extern unsigned long __ipipe_decr_ticks; + +extern unsigned long long __ipipe_decr_next[]; + +extern struct pt_regs __ipipe_tick_regs[]; + +extern unsigned long disarm_decr[]; + +void __ipipe_handle_irq(int irq, + struct pt_regs *regs); + +void __ipipe_serial_debug(const char *fmt, ...); + +#define __ipipe_tick_irq IPIPE_TIMER_VIRQ + +static inline unsigned long __ipipe_ffnz(unsigned long ul) +{ + __asm__ __volatile__("cntlzw %0, %1":"=r"(ul):"r"(ul & (-ul))); + return 31 - ul; +} + +/* When running handlers, enable hw interrupts for all domains but the + * one heading the pipeline, so that IRQs can never be significantly + * deferred for the latter. */ +#define __ipipe_run_isr(ipd, irq, cpuid) \ +do { \ + local_irq_enable_nohead(ipd); \ + if (ipd == ipipe_root_domain) \ + if (likely(!ipipe_virtual_irq_p(irq))) \ + ipd->irqs[irq].handler(irq, NULL); \ + else { \ + irq_enter(); \ + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie);\ + irq_exit(); \ + } \ + else { \ + __clear_bit(IPIPE_SYNC_FLAG, &cpudata->status); \ + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ + __set_bit(IPIPE_SYNC_FLAG, &cpudata->status); \ + } \ + local_irq_disable_nohead(ipd); \ +} while(0) + +#define __ipipe_syscall_watched_p(p, sc) \ + (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls) + +#else /* !CONFIG_IPIPE */ + +#define task_hijacked(p) 0 + +#endif /* CONFIG_IPIPE */ + +#endif /* !__POWERPC_IPIPE_H */ + diff -Naur linux-2.6.21.5.orig/include/asm-ppc/mmu_context.h linux-2.6.21.5/include/asm-ppc/mmu_context.h --- linux-2.6.21.5.orig/include/asm-ppc/mmu_context.h 2007-08-07 10:55:35.000000000 +0200 +++ linux-2.6.21.5/include/asm-ppc/mmu_context.h 2007-08-07 10:56:22.000000000 +0200 @@ -153,7 +153,10 @@ */ static inline void destroy_context(struct mm_struct *mm) { + unsigned long flags; + preempt_disable(); + local_irq_save_hw_cond(flags); if (mm->context.id != NO_CONTEXT) { clear_bit(mm->context.id, context_map); mm->context.id = NO_CONTEXT; @@ -161,6 +164,7 @@ atomic_inc(&nr_free_contexts); #endif } + local_irq_restore_hw_cond(flags); preempt_enable(); } @@ -193,7 +197,13 @@ * After we have set current->mm to a new value, this activates * the context for the new mm so we see the new mappings. */ -#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, current) +#define activate_mm(active_mm, mm) \ +do { \ + unsigned long flags; \ + local_irq_save_hw_cond(flags); \ + switch_mm(active_mm, mm, current); \ + local_irq_restore_hw_cond(flags); \ +} while(0) extern void mmu_context_init(void); diff -Naur linux-2.6.21.5.orig/include/linux/hardirq.h linux-2.6.21.5/include/linux/hardirq.h --- linux-2.6.21.5.orig/include/linux/hardirq.h 2007-08-07 10:55:37.000000000 +0200 +++ linux-2.6.21.5/include/linux/hardirq.h 2007-08-07 10:56:22.000000000 +0200 @@ -133,7 +133,7 @@ */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { if (ipipe_root_domain_p) { lockdep_off(); __irq_enter(); } } while (0) +#define nmi_exit() do { if (ipipe_root_domain_p) { __irq_exit(); lockdep_on(); } } while (0) #endif /* LINUX_HARDIRQ_H */ diff -Naur linux-2.6.21.5.orig/include/linux/ipipe_base.h linux-2.6.21.5/include/linux/ipipe_base.h --- linux-2.6.21.5.orig/include/linux/ipipe_base.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/include/linux/ipipe_base.h 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,88 @@ +/* -*- linux-c -*- + * include/linux/ipipe_base.h + * + * Copyright (C) 2002-2007 Philippe Gerum. + * 2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_BASE_H +#define __LINUX_IPIPE_BASE_H + +#ifdef CONFIG_IPIPE + +#include + +/* Per-cpu pipeline status */ +#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ +#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */ +#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */ + +#define IPIPE_SYNC_MASK (1 << IPIPE_SYNC_FLAG) + +extern struct ipipe_domain ipipe_root; + +#define ipipe_root_domain (&ipipe_root) + +#ifdef CONFIG_SMP + +void __ipipe_stall_root(void); + +unsigned long __ipipe_test_root(void); + +unsigned long __ipipe_test_and_stall_root(void); + +#else /* !CONFIG_SMP */ + +/* + * Note: This cast relies on cpudata[0].status being the first element in the + * root domain structure (for UP only). + */ +#define __ipipe_root_status (unsigned long *)&ipipe_root + +static inline void __ipipe_stall_root(void) +{ + set_bit(IPIPE_STALL_FLAG, __ipipe_root_status); +} + +static inline unsigned long __ipipe_test_root(void) +{ + return test_bit(IPIPE_STALL_FLAG, __ipipe_root_status); +} + +static inline unsigned long __ipipe_test_and_stall_root(void) +{ + return test_and_set_bit(IPIPE_STALL_FLAG, __ipipe_root_status); +} + +#endif /* !CONFIG_SMP */ + +void __ipipe_unstall_root(void); + +void __ipipe_restore_root(unsigned long x); + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +void ipipe_check_context(struct ipipe_domain *border_ipd); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ +static inline void ipipe_check_context(struct ipipe_domain *border_ipd) { } +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +#else /* !CONFIG_IPIPE */ +#define ipipe_check_context(ipd) do { } while(0) +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_BASE_H */ diff -Naur linux-2.6.21.5.orig/include/linux/ipipe.h linux-2.6.21.5/include/linux/ipipe.h --- linux-2.6.21.5.orig/include/linux/ipipe.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/include/linux/ipipe.h 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,692 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * + * Copyright (C) 2002-2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_H +#define __LINUX_IPIPE_H + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING +#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ + (IPIPE_MINOR_NUMBER << 8) | \ + (IPIPE_PATCH_NUMBER)) + +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) +#endif /* !BUILTIN_RETURN_ADDRESS */ + +#define IPIPE_ROOT_PRIO 100 +#define IPIPE_ROOT_ID 0 +#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ + +#define IPIPE_RESET_TIMER 0x1 +#define IPIPE_GRAB_TIMER 0x2 + +/* Global domain flags */ +#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ +#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ + +/* Interrupt control bits */ +#define IPIPE_HANDLE_FLAG 0 +#define IPIPE_PASS_FLAG 1 +#define IPIPE_ENABLE_FLAG 2 +#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG +#define IPIPE_STICKY_FLAG 3 +#define IPIPE_SYSTEM_FLAG 4 +#define IPIPE_LOCK_FLAG 5 +#define IPIPE_WIRED_FLAG 6 +#define IPIPE_EXCLUSIVE_FLAG 7 + +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) +#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) +#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) +#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) +#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) +#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) +#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) + +#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) +#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) + +#define IPIPE_EVENT_SELF 0x80000000 + +/* Number of virtual IRQs */ +#define IPIPE_NR_VIRQS BITS_PER_LONG +/* First virtual IRQ # */ +#define IPIPE_VIRQ_BASE (((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG) +/* Total number of IRQ slots */ +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS) +/* Number of indirect words needed to map the whole IRQ space. */ +#define IPIPE_IRQ_IWORDS ((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define IPIPE_IRQ_IMASK (BITS_PER_LONG - 1) +#define IPIPE_IRQMASK_ANY (~0L) +#define IPIPE_IRQMASK_VIRT (IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG)) + +#ifdef CONFIG_SMP + +#define IPIPE_NR_CPUS NR_CPUS +#define ipipe_declare_cpuid int cpuid +#define ipipe_load_cpuid() do { \ + cpuid = ipipe_processor_id(); \ + } while(0) +#define ipipe_lock_cpu(flags) do { \ + local_irq_save_hw(flags); \ + cpuid = ipipe_processor_id(); \ + } while(0) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) +#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) +#define ipipe_current_domain per_cpu(ipipe_percpu_domain, ipipe_processor_id()) + +#else /* !CONFIG_SMP */ + +#define IPIPE_NR_CPUS 1 +#define ipipe_declare_cpuid const int cpuid = 0 +#define ipipe_load_cpuid() do { } while(0) +#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) +#define ipipe_put_cpu(flags) do { } while(0) +#define ipipe_current_domain per_cpu(ipipe_percpu_domain, 0) + +#endif /* CONFIG_SMP */ + +#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ + (irq) < IPIPE_NR_IRQS) + +typedef void (*ipipe_irq_handler_t)(unsigned irq, + void *cookie); + +#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) + +typedef int (*ipipe_irq_ackfn_t)(unsigned irq); + +typedef int (*ipipe_event_handler_t)(unsigned event, + struct ipipe_domain *from, + void *data); +struct ipipe_domain { + + struct ipcpudata { + unsigned long status; /* Must be first in ipipe_domain */ + unsigned long irq_pending_hi; + unsigned long irq_pending_lo[IPIPE_IRQ_IWORDS]; + struct ipirqcnt { + unsigned long pending_hits; + unsigned long total_hits; + } irq_counters[IPIPE_NR_IRQS]; + unsigned long long evsync; + } ____cacheline_aligned_in_smp cpudata[IPIPE_NR_CPUS]; + + struct { + unsigned long control; + ipipe_irq_ackfn_t acknowledge; + ipipe_irq_handler_t handler; + void *cookie; + } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; + + struct list_head p_link; /* Link in pipeline */ + ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ + unsigned long long evself; /* Self-monitored event bits. */ + unsigned long flags; + unsigned domid; + const char *name; + int priority; + void *pdd; + struct mutex mutex; +}; + +#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ + +struct ipipe_domain_attr { + + unsigned domid; /* Domain identifier -- Magic value set by caller */ + const char *name; /* Domain name -- Warning: won't be dup'ed! */ + int priority; /* Priority in interrupt pipeline */ + void (*entry) (void); /* Domain entry point */ + void *pdd; /* Per-domain (opaque) data pointer */ +}; + +/* The following macros must be used hw interrupts off. */ + +#define __ipipe_irq_cookie(ipd,irq) (ipd)->irqs[irq].cookie +#define __ipipe_irq_handler(ipd,irq) (ipd)->irqs[irq].handler + +#define __ipipe_cpudata_irq_hits(ipd,cpuid,irq) ((ipd)->cpudata[cpuid].irq_counters[irq].total_hits) + +#define __ipipe_set_irq_bit(ipd,cpuid,irq) \ +do { \ + if (!test_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) { \ + __set_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + __set_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[cpuid].irq_pending_hi); \ + } \ +} while(0) + +#define __ipipe_clear_pend(ipd,cpuid,irq) \ +do { \ + __clear_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + if ((ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT] == 0) \ + __clear_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[cpuid].irq_pending_hi); \ +} while(0) + +#define __ipipe_lock_irq(ipd,cpuid,irq) \ +do { \ + if (!test_and_set_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) \ + __ipipe_clear_pend(ipd,cpuid,irq); \ +} while(0) + +#define __ipipe_unlock_irq(ipd,irq) \ +do { \ + int __cpuid, __nr_cpus = num_online_cpus(); \ + if (test_and_clear_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) \ + for (__cpuid = 0; __cpuid < __nr_cpus; __cpuid++) \ + if ((ipd)->cpudata[__cpuid].irq_counters[irq].pending_hits > 0) { /* We need atomic ops next. */ \ + set_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[__cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + set_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[__cpuid].irq_pending_hi); \ + } \ +} while(0) + +#define __ipipe_clear_irq(ipd,irq) \ +do { \ + int __cpuid, __nr_cpus = num_online_cpus(); \ + clear_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control); \ + for (__cpuid = 0; __cpuid < __nr_cpus; __cpuid++) { \ + (ipd)->cpudata[__cpuid].irq_counters[irq].pending_hits = 0; \ + __ipipe_clear_pend(ipd,__cpuid,irq); \ + } \ +} while(0) + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define write_lock_hw(x) __raw_write_lock(&(x)->raw_lock) +#define write_trylock_hw(x) __raw_write_trylock(&(x)->raw_lock) +#define write_unlock_hw(x) __raw_write_unlock(&(x)->raw_lock) +#define read_lock_hw(x) __raw_read_lock(&(x)->raw_lock) +#define read_trylock_hw(x) __raw_read_trylock(&(x)->raw_lock) +#define read_unlock_hw(x) __raw_read_unlock(&(x)->raw_lock) +#else /* UP non-debug */ +#define write_lock_hw(lock) do { (void)(lock); } while (0) +#define write_trylock_hw(lock) ({ (void)(lock); 1; }) +#define write_unlock_hw(lock) do { (void)(lock); } while (0) +#define read_lock_hw(lock) do { (void)(lock); } while (0) +#define read_trylock_hw(lock) ({ (void)(lock); 1; }) +#define read_unlock_hw(lock) do { (void)(lock); } while (0) +#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ + +typedef rwlock_t ipipe_rwlock_t; +#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED + +#define read_lock_irqsave_hw(lock, flags) \ +do { \ + local_irq_save_hw(flags); \ + read_lock_hw(lock); \ +} while (0) + +#define read_unlock_irqrestore_hw(lock, flags) \ +do { \ + read_unlock_hw(lock); \ + local_irq_restore_hw(flags); \ +} while (0) + +#define write_lock_irqsave_hw(lock, flags) \ +do { \ + local_irq_save_hw(flags); \ + write_lock_hw(lock); \ +} while (0) + +#define write_unlock_irqrestore_hw(lock, flags) \ +do { \ + write_unlock_hw(lock); \ + local_irq_restore_hw(flags); \ +} while (0) + +DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); + +extern unsigned __ipipe_printk_virq; + +extern unsigned long __ipipe_virtual_irq_map; + +extern struct list_head __ipipe_pipeline; + +extern int __ipipe_event_monitors[]; + +/* Private interface */ + +void ipipe_init(void); + +#ifdef CONFIG_PROC_FS +void ipipe_init_proc(void); + +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_init_tracer(void); +#else /* !CONFIG_IPIPE_TRACE */ +#define __ipipe_init_tracer() do { } while(0) +#endif /* CONFIG_IPIPE_TRACE */ + +#else /* !CONFIG_PROC_FS */ +#define ipipe_init_proc() do { } while(0) +#endif /* CONFIG_PROC_FS */ + +void __ipipe_init_stage(struct ipipe_domain *ipd); + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd); + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_flush_printk(unsigned irq, void *cookie); + +void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid); + +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head); + +int fastcall __ipipe_dispatch_event(unsigned event, void *data); + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); + +void fastcall __ipipe_sync_stage(unsigned long syncmask); + +void __ipipe_pin_range_globally(unsigned long start, unsigned long end); + +struct mm_struct; + +int __ipipe_pin_range_mapping(struct mm_struct *mm, + unsigned long start, unsigned long end); + +#ifndef __ipipe_sync_pipeline +#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask) +#endif + +#ifndef __ipipe_run_irqtail +#define __ipipe_run_irqtail() do { } while(0) +#endif + +#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) + +/* + * Keep the following as a macro, so that client code could check for + * the support of the invariant pipeline head optimization. + */ +#define __ipipe_pipeline_head() list_entry(__ipipe_pipeline.next,struct ipipe_domain,p_link) + +#define __ipipe_event_monitored_p(ev) \ + (__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL << ev))) + +#ifdef CONFIG_SMP + +cpumask_t __ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall __ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +#endif /* CONFIG_SMP */ + +/* Called with hw interrupts off. */ +static inline void __ipipe_switch_to(struct ipipe_domain *out, + struct ipipe_domain *in, int cpuid) +{ + void ipipe_suspend_domain(void); + + /* + * "in" is guaranteed to be closer than "out" from the head of the + * pipeline (and obviously different). + */ + + out->cpudata[cpuid].evsync = 0; + per_cpu(ipipe_percpu_domain, cpuid) = in; + + ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) == in) + /* + * Otherwise, something has changed the current domain under + * our feet recycling the register set; do not override. + */ + per_cpu(ipipe_percpu_domain, cpuid) = out; +} + +#define ipipe_sigwake_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p); \ +} while(0) + +#define ipipe_exit_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ + __ipipe_dispatch_event(IPIPE_EVENT_EXIT,p); \ +} while(0) + +#define ipipe_setsched_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p); \ +} while(0) + +#define ipipe_schedule_notify(prev, next) \ +do { \ + if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \ + __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \ +} while(0) + +#define ipipe_trap_notify(ex, regs) \ +({ \ + ipipe_declare_cpuid; \ + int ret = 0; \ + ipipe_load_cpuid(); \ + if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_current_domain->cpudata[cpuid].status) || \ + ((current)->flags & PF_EVNOTIFY)) && \ + __ipipe_event_monitored_p(ex)) \ + ret = __ipipe_dispatch_event(ex, regs); \ + ret; \ +}) + +static inline void ipipe_init_notify(struct task_struct *p) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) + __ipipe_dispatch_event(IPIPE_EVENT_INIT,p); +} + +static inline void ipipe_cleanup_notify(struct mm_struct *mm) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) + __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm); +} + +/* Public interface */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr); + +int ipipe_unregister_domain(struct ipipe_domain *ipd); + +void ipipe_suspend_domain(void); + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask); + +int ipipe_control_irq(unsigned irq, + unsigned clrmask, + unsigned setmask); + +unsigned ipipe_alloc_virq(void); + +int ipipe_free_virq(unsigned virq); + +int fastcall ipipe_trigger_irq(unsigned irq); + +static inline int ipipe_propagate_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, ipipe_current_domain->p_link.next); +} + +static inline int ipipe_schedule_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, &ipipe_current_domain->p_link); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); + +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x); + +static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); + x = test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} + +static inline void ipipe_restore_pipeline_nosync(struct ipipe_domain *ipd, + unsigned long x, int cpuid) +{ + /* + * If cpuid is current, then it must be held on entry + * (ipipe_get_cpu/local_irq_save_hw/local_irq_disable_hw). + */ + + if (x) + __set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + else + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); +} + +static inline void ipipe_stall_pipeline_head(void) +{ + ipipe_declare_cpuid; + + local_irq_disable_hw(); + ipipe_load_cpuid(); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_pipeline_head()->cpudata[cpuid].status); +} + +static inline unsigned long ipipe_test_and_stall_pipeline_head(void) +{ + ipipe_declare_cpuid; + + local_irq_disable_hw(); + ipipe_load_cpuid(); + return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_pipeline_head()->cpudata[cpuid].status); +} + +void ipipe_unstall_pipeline_head(void); + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head, + unsigned long x); + +static inline void ipipe_restore_pipeline_head(unsigned long x) +{ + struct ipipe_domain *head = __ipipe_pipeline_head(); + /* On some archs, __test_and_set_bit() might return different + * truth value than test_bit(), so we test the exclusive OR of + * both statuses, assuming that the lowest bit is always set in + * the truth value (if this is wrong, the failed optimization will + * be caught in __ipipe_restore_pipeline_head() if + * CONFIG_DEBUG_KERNEL is set). */ + if ((x ^ test_bit(IPIPE_STALL_FLAG, &head->cpudata[ipipe_processor_id()].status)) & 1) + __ipipe_restore_pipeline_head(head,x); +} + +#define ipipe_unstall_pipeline() \ + ipipe_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_unstall_pipeline() \ + ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_pipeline() \ + ipipe_test_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_stall_pipeline() \ + ipipe_test_and_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_stall_pipeline() \ + ipipe_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_restore_pipeline(x) \ + ipipe_restore_pipeline_from(ipipe_current_domain, (x)) + +void ipipe_init_attr(struct ipipe_domain_attr *attr); + +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); + +int ipipe_tune_timer(unsigned long ns, + int flags); + +unsigned long ipipe_critical_enter(void (*syncfn) (void)); + +void ipipe_critical_exit(unsigned long flags); + +static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) +{ + set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) +{ + clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + ipipe_declare_cpuid; + ipipe_load_cpuid(); + __set_bit(IPIPE_NOSTACK_FLAG, &ipd->cpudata[cpuid].status); +} + +static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + ipipe_declare_cpuid; + ipipe_load_cpuid(); + __clear_bit(IPIPE_NOSTACK_FLAG, &ipd->cpudata[cpuid].status); +} + +#define ipipe_safe_current() \ +({ \ + ipipe_declare_cpuid; \ + struct task_struct *p; \ + ipipe_load_cpuid(); \ + p = test_bit(IPIPE_NOSTACK_FLAG, \ + &per_cpu(ipipe_percpu_domain, cpuid)->cpudata[cpuid].status) ? &init_task : current; \ + p; \ +}) + +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler); + +cpumask_t ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +int ipipe_setscheduler_root(struct task_struct *p, + int policy, + int prio); + +int ipipe_reenter_root(struct task_struct *prev, + int policy, + int prio); + +int ipipe_alloc_ptdkey(void); + +int ipipe_free_ptdkey(int key); + +int fastcall ipipe_set_ptd(int key, + void *value); + +void fastcall *ipipe_get_ptd(int key); + +int ipipe_disable_ondemand_mappings(struct task_struct *tsk); + +#define local_irq_enable_hw_cond() local_irq_enable_hw() +#define local_irq_disable_hw_cond() local_irq_disable_hw() +#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) +#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) +#define local_irq_disable_head() ipipe_stall_pipeline_head() + +#define local_irq_enable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_enable_hw(); \ + } while(0) + +#define local_irq_disable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_disable_hw(); \ + } while(0) + +#define ipipe_irq_lock(irq) \ + do { \ + ipipe_declare_cpuid; \ + ipipe_load_cpuid(); \ + __ipipe_lock_irq(per_cpu(ipipe_percpu_domain, cpuid), cpuid, irq);\ + } while(0) + +#define ipipe_irq_unlock(irq) \ + do { \ + ipipe_declare_cpuid; \ + ipipe_load_cpuid(); \ + __ipipe_unlock_irq(per_cpu(ipipe_percpu_domain, cpuid), irq); \ + } while(0) + +#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) + +#else /* !CONFIG_IPIPE */ + +#define ipipe_init() do { } while(0) +#define ipipe_suspend_domain() do { } while(0) +#define ipipe_sigwake_notify(p) do { } while(0) +#define ipipe_setsched_notify(p) do { } while(0) +#define ipipe_init_notify(p) do { } while(0) +#define ipipe_exit_notify(p) do { } while(0) +#define ipipe_cleanup_notify(mm) do { } while(0) +#define ipipe_trap_notify(t,r) 0 +#define ipipe_init_proc() do { } while(0) +#define __ipipe_pin_range_globally(start, end) do { } while(0) + +#define local_irq_enable_hw_cond() do { } while(0) +#define local_irq_disable_hw_cond() do { } while(0) +#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) +#define local_irq_restore_hw_cond(flags) do { } while(0) + +#define ipipe_irq_lock(irq) do { } while(0) +#define ipipe_irq_unlock(irq) do { } while(0) + +#define ipipe_root_domain_p 1 +#define ipipe_safe_current current + +#define local_irq_disable_head() local_irq_disable() + +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_H */ diff -Naur linux-2.6.21.5.orig/include/linux/ipipe_trace.h linux-2.6.21.5/include/linux/ipipe_trace.h --- linux-2.6.21.5.orig/include/linux/ipipe_trace.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/include/linux/ipipe_trace.h 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,49 @@ +/* -*- linux-c -*- + * include/linux/ipipe_trace.h + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LINUX_IPIPE_TRACE_H +#define _LINUX_IPIPE_TRACE_H + +#include + +void ipipe_trace_begin(unsigned long v); +void ipipe_trace_end(unsigned long v); +void ipipe_trace_freeze(unsigned long v); +void ipipe_trace_special(unsigned char special_id, unsigned long v); +void ipipe_trace_pid(pid_t pid, short prio); + +int ipipe_trace_max_reset(void); +int ipipe_trace_frozen_reset(void); + +#ifdef CONFIG_IPIPE_TRACE_PANIC + +void ipipe_trace_panic_freeze(void); +void ipipe_trace_panic_dump(void); + +#else /* !CONFIG_IPIPE_TRACE_PANIC */ + +static inline void ipipe_trace_panic_freeze(void) { } +static inline void ipipe_trace_panic_dump(void) { } + +#endif /* !CONFIG_IPIPE_TRACE_PANIC */ + +#endif /* !__LINUX_IPIPE_H */ diff -Naur linux-2.6.21.5.orig/include/linux/irq.h linux-2.6.21.5/include/linux/irq.h --- linux-2.6.21.5.orig/include/linux/irq.h 2007-08-07 10:55:37.000000000 +0200 +++ linux-2.6.21.5/include/linux/irq.h 2007-08-07 10:56:22.000000000 +0200 @@ -151,6 +151,12 @@ * Pad this out to 32 bytes for cache and indexing reasons. */ struct irq_desc { +#ifdef CONFIG_IPIPE + void fastcall (*ipipe_ack)(unsigned int irq, + struct irq_desc *desc); + void fastcall (*ipipe_end)(unsigned int irq, + struct irq_desc *desc); +#endif /* CONFIG_IPIPE */ irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; diff -Naur linux-2.6.21.5.orig/include/linux/kernel.h linux-2.6.21.5/include/linux/kernel.h --- linux-2.6.21.5.orig/include/linux/kernel.h 2007-08-07 10:55:37.000000000 +0200 +++ linux-2.6.21.5/include/linux/kernel.h 2007-08-07 10:56:22.000000000 +0200 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -72,9 +73,12 @@ */ #ifdef CONFIG_PREEMPT_VOLUNTARY extern int cond_resched(void); -# define might_resched() cond_resched() +# define might_resched() do { \ + ipipe_check_context(ipipe_root_domain); \ + cond_resched(); \ + } while (0) #else -# define might_resched() do { } while (0) +# define might_resched() ipipe_check_context(ipipe_root_domain) #endif #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP diff -Naur linux-2.6.21.5.orig/include/linux/linkage.h linux-2.6.21.5/include/linux/linkage.h --- linux-2.6.21.5.orig/include/linux/linkage.h 2007-08-07 10:55:37.000000000 +0200 +++ linux-2.6.21.5/include/linux/linkage.h 2007-08-07 10:56:22.000000000 +0200 @@ -64,4 +64,8 @@ #define fastcall #endif +#ifndef notrace +#define notrace __attribute__((no_instrument_function)) +#endif + #endif diff -Naur linux-2.6.21.5.orig/include/linux/mm.h linux-2.6.21.5/include/linux/mm.h --- linux-2.6.21.5.orig/include/linux/mm.h 2007-08-07 10:55:37.000000000 +0200 +++ linux-2.6.21.5/include/linux/mm.h 2007-08-07 10:56:22.000000000 +0200 @@ -169,6 +169,7 @@ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_PINNED 0x08000000 /* Disable faults for the vma */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS diff -Naur linux-2.6.21.5.orig/include/linux/preempt.h linux-2.6.21.5/include/linux/preempt.h --- linux-2.6.21.5.orig/include/linux/preempt.h 2007-08-07 10:55:38.000000000 +0200 +++ linux-2.6.21.5/include/linux/preempt.h 2007-08-07 10:56:22.000000000 +0200 @@ -8,6 +8,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_PREEMPT extern void fastcall add_preempt_count(int val); @@ -28,18 +29,21 @@ #define preempt_disable() \ do { \ + ipipe_check_context(ipipe_root_domain); \ inc_preempt_count(); \ barrier(); \ } while (0) #define preempt_enable_no_resched() \ do { \ + ipipe_check_context(ipipe_root_domain); \ barrier(); \ dec_preempt_count(); \ } while (0) #define preempt_check_resched() \ do { \ + ipipe_check_context(ipipe_root_domain); \ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ preempt_schedule(); \ } while (0) @@ -53,10 +57,10 @@ #else -#define preempt_disable() do { } while (0) -#define preempt_enable_no_resched() do { } while (0) -#define preempt_enable() do { } while (0) -#define preempt_check_resched() do { } while (0) +#define preempt_disable() ipipe_check_context(ipipe_root_domain) +#define preempt_enable_no_resched() ipipe_check_context(ipipe_root_domain) +#define preempt_enable() ipipe_check_context(ipipe_root_domain) +#define preempt_check_resched() ipipe_check_context(ipipe_root_domain) #endif diff -Naur linux-2.6.21.5.orig/include/linux/sched.h linux-2.6.21.5/include/linux/sched.h --- linux-2.6.21.5.orig/include/linux/sched.h 2007-08-07 10:55:38.000000000 +0200 +++ linux-2.6.21.5/include/linux/sched.h 2007-08-07 10:56:22.000000000 +0200 @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -151,6 +152,13 @@ /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 #define TASK_DEAD 128 +#ifdef CONFIG_IPIPE +#define TASK_ATOMICSWITCH 512 +#define TASK_NOWAKEUP 1024 +#else /* !CONFIG_IPIPE */ +#define TASK_ATOMICSWITCH 0 +#define TASK_NOWAKEUP 0 +#endif /* CONFIG_IPIPE */ #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -1041,6 +1049,9 @@ atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; +#ifdef CONFIG_IPIPE + void *ptd[IPIPE_ROOT_NPTDKEYS]; +#endif /* * cache last used pipe for splice @@ -1158,6 +1169,11 @@ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#ifdef CONFIG_IPIPE +#define PF_EVNOTIFY 0x40000000 /* Notify other domains about internal events */ +#else +#define PF_EVNOTIFY 0 +#endif /* CONFIG_IPIPE */ /* * Only the _current_ task can read/write to tsk->flags, but other diff -Naur linux-2.6.21.5.orig/include/linux/smp.h linux-2.6.21.5/include/linux/smp.h --- linux-2.6.21.5.orig/include/linux/smp.h 2007-08-07 10:55:38.000000000 +0200 +++ linux-2.6.21.5/include/linux/smp.h 2007-08-07 10:56:22.000000000 +0200 @@ -109,6 +109,8 @@ #endif /* !SMP */ +#include + /* * smp_processor_id(): get the current CPU ID. * diff -Naur linux-2.6.21.5.orig/include/linux/spinlock.h linux-2.6.21.5/include/linux/spinlock.h --- linux-2.6.21.5.orig/include/linux/spinlock.h 2007-08-07 10:55:38.000000000 +0200 +++ linux-2.6.21.5/include/linux/spinlock.h 2007-08-07 11:10:53.000000000 +0200 @@ -172,7 +172,90 @@ #define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) #define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) -#define spin_lock(lock) _spin_lock(lock) +#undef TYPE_EQUAL +#define TYPE_EQUAL(lock, type) \ + __builtin_types_compatible_p(typeof(lock), type *) + +#define PICK_SPINOP(op, lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \ + __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin##op((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINOP_RAW(op, lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \ + __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + else if (TYPE_EQUAL(lock, spinlock_t)) \ + __raw_spin##op(&((spinlock_t *)(lock))->raw_lock); \ +} while (0) + +#define PICK_SPINLOCK_IRQ(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_lock_irq((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINUNLOCK_IRQ(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_unlock_irq((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINLOCK_IRQ_RAW(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + local_irq_disable(); \ + __raw_spin_lock(&((spinlock_t *)(lock))->raw_lock); \ +} while (0) + +#define PICK_SPINUNLOCK_IRQ_RAW(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + __raw_spin_unlock(&((spinlock_t *)(lock))->raw_lock); \ + local_irq_enable(); \ +} while (0) + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +extern int __bad_spinlock_type(void); + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + (flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + flags = _spin_lock_irqsave((spinlock_t *)(lock)); \ + else __bad_spinlock_type(); \ +} while (0) +#else +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + (flags) = __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_lock_irqsave((spinlock_t *)(lock), flags); \ +} while (0) +#endif + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_unlock_irqrestore(&((__ipipe_spinlock_t *)(lock))->__raw_lock, flags); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_unlock_irqrestore((spinlock_t *)(lock), flags); \ +} while (0) + +#define spin_lock(lock) PICK_SPINOP(_lock, lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass) @@ -185,7 +268,7 @@ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) +#define spin_lock_irqsave(lock, flags) PICK_SPINLOCK_IRQSAVE(lock, flags) #define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock) #define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock) @@ -199,7 +282,7 @@ #else -#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) +#define spin_lock_irqsave(lock, flags) PICK_SPINLOCK_IRQSAVE(lock, flags) #define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) #define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags) #define spin_lock_irqsave_nested(lock, flags, subclass) \ @@ -207,7 +290,7 @@ #endif -#define spin_lock_irq(lock) _spin_lock_irq(lock) +#define spin_lock_irq(lock) PICK_SPINLOCK_IRQ(lock) #define spin_lock_bh(lock) _spin_lock_bh(lock) #define read_lock_irq(lock) _read_lock_irq(lock) @@ -221,22 +304,22 @@ */ #if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) +# define spin_unlock(lock) PICK_SPINOP(_unlock, lock) # define read_unlock(lock) _read_unlock(lock) # define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) +# define spin_unlock_irq(lock) PICK_SPINUNLOCK_IRQ(lock) # define read_unlock_irq(lock) _read_unlock_irq(lock) # define write_unlock_irq(lock) _write_unlock_irq(lock) #else # define spin_unlock(lock) \ - do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0) + do {PICK_SPINOP_RAW(_unlock, lock); __release(lock); } while (0) # define read_unlock(lock) \ do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0) # define write_unlock(lock) \ do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0) # define spin_unlock_irq(lock) \ do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ + PICK_SPINUNLOCK_IRQ_RAW(lock) \ __release(lock); \ local_irq_enable(); \ } while (0) @@ -255,7 +338,7 @@ #endif #define spin_unlock_irqrestore(lock, flags) \ - _spin_unlock_irqrestore(lock, flags) + PICK_SPINUNLOCK_IRQRESTORE(lock, flags) #define spin_unlock_bh(lock) _spin_unlock_bh(lock) #define read_unlock_irqrestore(lock, flags) \ @@ -339,4 +422,29 @@ */ #define spin_can_lock(lock) (!spin_is_locked(lock)) +#ifdef CONFIG_IPIPE +void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock); +unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, + unsigned long x); +void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x); +#define spin_lock_irqsave_cond(lock, flags) \ + spin_lock_irqsave(lock,flags) +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock_irqrestore(lock,flags) +#else +#define spin_lock_irqsave_cond(lock, flags) \ + do { (void)(flags); spin_lock(lock); } while(0) +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock(lock) +#define __ipipe_spin_lock_irq(lock) do { } while(0) +#define __ipipe_spin_unlock_irq(lock) do { } while(0) +#define __ipipe_spin_lock_irqsave(lock) 0 +#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while(0) +#define __ipipe_spin_unlock_irqbegin(lock) do { } while(0) +#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while(0) +#endif + #endif /* __LINUX_SPINLOCK_H */ diff -Naur linux-2.6.21.5.orig/include/linux/spinlock_types.h linux-2.6.21.5/include/linux/spinlock_types.h --- linux-2.6.21.5.orig/include/linux/spinlock_types.h 2007-08-07 10:55:38.000000000 +0200 +++ linux-2.6.21.5/include/linux/spinlock_types.h 2007-08-07 10:56:22.000000000 +0200 @@ -31,6 +31,10 @@ #endif } spinlock_t; +typedef struct { + raw_spinlock_t __raw_lock; +} __ipipe_spinlock_t; + #define SPINLOCK_MAGIC 0xdead4ead typedef struct { @@ -86,9 +90,19 @@ #endif #define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init) +#define IPIPE_SPIN_LOCK_UNLOCKED \ + (__ipipe_spinlock_t) { .__raw_lock = __RAW_SPIN_LOCK_UNLOCKED } #define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) #define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) #define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) +#ifdef CONFIG_IPIPE +# define ipipe_spinlock_t __ipipe_spinlock_t +# define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x = IPIPE_SPIN_LOCK_UNLOCKED +#else +# define ipipe_spinlock_t spinlock_t +# define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) +#endif + #endif /* __LINUX_SPINLOCK_TYPES_H */ diff -Naur linux-2.6.21.5.orig/init/Kconfig linux-2.6.21.5/init/Kconfig --- linux-2.6.21.5.orig/init/Kconfig 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/init/Kconfig 2007-08-07 10:56:22.000000000 +0200 @@ -67,6 +67,7 @@ config LOCALVERSION string "Local version - append to kernel release" + default "-ipipe" help Append an extra string to the end of your kernel version. This will show up when you type uname, for example. diff -Naur linux-2.6.21.5.orig/init/main.c linux-2.6.21.5/init/main.c --- linux-2.6.21.5.orig/init/main.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/init/main.c 2007-08-07 10:56:22.000000000 +0200 @@ -513,7 +513,7 @@ unwind_init(); lockdep_init(); - local_irq_disable(); + local_irq_disable_hw(); early_boot_irqs_off(); early_init_irq_lock_class(); @@ -566,6 +566,11 @@ softirq_init(); timekeeping_init(); time_init(); + /* + * We need to wait for the interrupt and time subsystems to be + * initialized before enabling the pipeline. + */ + ipipe_init(); profile_init(); if (!irqs_disabled()) printk("start_kernel(): bug: interrupts were enabled early\n"); @@ -709,6 +714,7 @@ usermodehelper_init(); driver_init(); init_irq_proc(); + ipipe_init_proc(); do_initcalls(); } diff -Naur linux-2.6.21.5.orig/kernel/exit.c linux-2.6.21.5/kernel/exit.c --- linux-2.6.21.5.orig/kernel/exit.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/exit.c 2007-08-07 10:56:22.000000000 +0200 @@ -923,6 +923,7 @@ if (group_dead) acct_process(); + ipipe_exit_notify(tsk); exit_sem(tsk); __exit_files(tsk); __exit_fs(tsk); diff -Naur linux-2.6.21.5.orig/kernel/fork.c linux-2.6.21.5/kernel/fork.c --- linux-2.6.21.5.orig/kernel/fork.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/fork.c 2007-08-07 10:56:22.000000000 +0200 @@ -387,6 +387,7 @@ if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); + ipipe_cleanup_notify(mm); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); @@ -917,7 +918,7 @@ { unsigned long new_flags = p->flags; - new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); + new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE | PF_EVNOTIFY); new_flags |= PF_FORKNOEXEC; if (!(clone_flags & CLONE_PTRACE)) p->ptrace = 0; @@ -1264,6 +1265,14 @@ spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); +#ifdef CONFIG_IPIPE + { + int k; + + for (k = 0; k < IPIPE_ROOT_NPTDKEYS; k++) + p->ptd[k] = NULL; + } +#endif /* CONFIG_IPIPE */ return p; bad_fork_cleanup_namespaces: diff -Naur linux-2.6.21.5.orig/kernel/ipipe/core.c linux-2.6.21.5/kernel/ipipe/core.c --- linux-2.6.21.5.orig/kernel/ipipe/core.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/kernel/ipipe/core.c 2007-08-07 11:12:10.000000000 +0200 @@ -0,0 +1,1517 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-independent I-PIPE core support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#include +#endif /* CONFIG_PROC_FS */ +#include + +static int __ipipe_ptd_key_count; + +static unsigned long __ipipe_ptd_key_map; + +struct ipipe_domain ipipe_root = + { .cpudata = {[0 ... IPIPE_NR_CPUS-1] = + { .status = (1<name = "Linux"; + ipd->domid = IPIPE_ROOT_ID; + ipd->priority = IPIPE_ROOT_PRIO; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + list_add_tail(&ipd->p_link, &__ipipe_pipeline); + + __ipipe_init_platform(); + +#ifdef CONFIG_PRINTK + __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ + ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; + ipd->irqs[__ipipe_printk_virq].cookie = NULL; + ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; + ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; +#endif /* CONFIG_PRINTK */ + + __ipipe_enable_pipeline(); + + printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", + IPIPE_VERSION_STRING); +} + +void __ipipe_init_stage(struct ipipe_domain *ipd) +{ + int cpuid, n; + + for (cpuid = 0; cpuid < IPIPE_NR_CPUS; cpuid++) { + ipd->cpudata[cpuid].irq_pending_hi = 0; + + for (n = 0; n < IPIPE_IRQ_IWORDS; n++) + ipd->cpudata[cpuid].irq_pending_lo[n] = 0; + + for (n = 0; n < IPIPE_NR_IRQS; n++) { + ipd->cpudata[cpuid].irq_counters[n].total_hits = 0; + ipd->cpudata[cpuid].irq_counters[n].pending_hits = 0; + } + + ipd->cpudata[cpuid].evsync = 0; + } + + for (n = 0; n < IPIPE_NR_IRQS; n++) { + ipd->irqs[n].acknowledge = NULL; + ipd->irqs[n].handler = NULL; + ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ + } + + for (n = 0; n < IPIPE_NR_EVENTS; n++) + ipd->evhand[n] = NULL; + + ipd->evself = 0LL; + mutex_init(&ipd->mutex); + + __ipipe_hook_critical_ipi(ipd); +} + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd) +{ + ipipe_unstall_pipeline_from(ipd); + +#ifdef CONFIG_SMP + { + int cpu; + + for_each_online_cpu(cpu) { + while (ipd->cpudata[cpu].irq_pending_hi != 0) + cpu_relax(); + } + } +#endif /* CONFIG_SMP */ +} + +#ifdef CONFIG_SMP +void __ipipe_stall_root(void) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); /* Care for migration. */ + set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); +} + +unsigned long __ipipe_test_root(void) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); /* Care for migration. */ + x = test_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} + +unsigned long __ipipe_test_and_stall_root(void) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); /* Care for migration. */ + x = test_and_set_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} +#endif /* CONFIG_SMP */ + +void __ipipe_unstall_root(void) +{ + ipipe_declare_cpuid; + + local_irq_disable_hw(); + + ipipe_load_cpuid(); + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + + if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + local_irq_enable_hw(); +} + +void __ipipe_restore_root(unsigned long x) +{ + if (x) + __ipipe_stall_root(); + else + __ipipe_unstall_root(); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd) +{ + ipipe_declare_cpuid; +#ifdef CONFIG_SMP + unsigned long flags; + + ipipe_lock_cpu(flags); /* Care for migration. */ + + __set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (!__ipipe_pipeline_head_p(ipd)) + ipipe_unlock_cpu(flags); +#else /* CONFIG_SMP */ + set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); +#endif /* CONFIG_SMP */ +} + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) +{ + ipipe_declare_cpuid; + unsigned long s; +#ifdef CONFIG_SMP + unsigned long flags; + + ipipe_lock_cpu(flags); /* Care for migration. */ + + s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (!__ipipe_pipeline_head_p(ipd)) + ipipe_unlock_cpu(flags); +#else /* CONFIG_SMP */ + s = test_and_set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); +#endif /* CONFIG_SMP */ + + return s; +} + +void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock) +{ + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + + local_irq_disable_hw(); + __raw_spin_lock(lock); + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + __set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); +} + +void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock) +{ + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + + __raw_spin_unlock(lock); + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + local_irq_enable_hw(); +} + +unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock) +{ + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + unsigned long flags; + int s; + + local_irq_save_hw(flags); + __raw_spin_lock(lock); + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + return raw_mangle_irq_bits(s, flags); +} + +void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long x) +{ + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + + __raw_spin_unlock(lock); + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + if (!raw_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + local_irq_restore_hw(x); +} + +/* + * ipipe_unstall_pipeline_from() -- Unstall the pipeline and + * synchronize pending interrupts for a given domain. See + * __ipipe_walk_pipeline() for more information. + */ +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) +{ + struct list_head *pos; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (ipd == per_cpu(ipipe_percpu_domain, cpuid)) + pos = &ipd->p_link; + else + pos = __ipipe_pipeline.next; + + __ipipe_walk_pipeline(pos, cpuid); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_enable_hw(); + else + ipipe_unlock_cpu(flags); +} + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); + x = test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + ipipe_unstall_pipeline_from(ipd); + ipipe_put_cpu(flags); + + return x; +} + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x) +{ + if (x) + ipipe_stall_pipeline_from(ipd); + else + ipipe_unstall_pipeline_from(ipd); +} + +void ipipe_unstall_pipeline_head(void) +{ + struct ipipe_domain *head; + ipipe_declare_cpuid; + + local_irq_disable_hw(); + ipipe_load_cpuid(); + head = __ipipe_pipeline_head(); + __clear_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); + + if (unlikely(head->cpudata[cpuid].irq_pending_hi != 0)) { + if (likely(head == per_cpu(ipipe_percpu_domain, cpuid))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head->p_link, cpuid); + } + + local_irq_enable_hw(); +} + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head, unsigned long x) +{ + ipipe_declare_cpuid; + + local_irq_disable_hw(); + ipipe_load_cpuid(); + + if (x) { +#ifdef CONFIG_DEBUG_KERNEL + static int warned; + if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status)) { + /* + * Already stalled albeit ipipe_restore_pipeline_head() + * should have detected it? Send a warning once.\n"); + */ + warned = 1; + printk(KERN_WARNING + "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); + dump_stack(); + } +#else /* !CONFIG_DEBUG_KERNEL */ + set_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); +#endif /* CONFIG_DEBUG_KERNEL */ + } + else { + __clear_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); + if (unlikely(head->cpudata[cpuid].irq_pending_hi != 0)) { + if (likely(head == per_cpu(ipipe_percpu_domain, cpuid))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head->p_link, cpuid); + } + local_irq_enable_hw(); + } +} + +void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) +{ + __raw_spin_unlock(&lock->__raw_lock); +} + +void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x) +{ + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + if (!raw_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + local_irq_restore_hw(x); +} + +/* __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must + be called with local hw interrupts disabled. */ + +void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid) +{ + struct ipipe_domain *this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + while (pos != &__ipipe_pipeline) { + struct ipipe_domain *next_domain = + list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit + (IPIPE_STALL_FLAG, &next_domain->cpudata[cpuid].status)) + break; /* Stalled stage -- do not go further. */ + + if (next_domain->cpudata[cpuid].irq_pending_hi != 0) { + + if (next_domain == this_domain) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else { + __ipipe_switch_to(this_domain, next_domain, + cpuid); + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (this_domain->cpudata[cpuid]. + irq_pending_hi != 0 + && !test_bit(IPIPE_STALL_FLAG, + &this_domain->cpudata[cpuid].status)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + } + + break; + } else if (next_domain == this_domain) + break; + + pos = next_domain->p_link.next; + } +} + +/* + * ipipe_suspend_domain() -- Suspend the current domain, switching to + * the next one which has pending work down the pipeline. + */ +void ipipe_suspend_domain(void) +{ + struct ipipe_domain *this_domain, *next_domain; + struct list_head *ln; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + this_domain = next_domain = per_cpu(ipipe_percpu_domain, cpuid); + + __clear_bit(IPIPE_STALL_FLAG, &this_domain->cpudata[cpuid].status); + + if (this_domain->cpudata[cpuid].irq_pending_hi != 0) + goto sync_stage; + + for (;;) { + ln = next_domain->p_link.next; + + if (ln == &__ipipe_pipeline) + break; + + next_domain = list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, + &next_domain->cpudata[cpuid].status)) + break; + + if (next_domain->cpudata[cpuid].irq_pending_hi == 0) + continue; + + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + +sync_stage: + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + /* + * Something has changed the current domain under our + * feet, recycling the register set; take note. + */ + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + per_cpu(ipipe_percpu_domain, cpuid) = this_domain; + + ipipe_unlock_cpu(flags); +} + +/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. + * Virtual interrupts are handled in exactly the same way than their + * hw-generated counterparts wrt pipelining. + */ +unsigned ipipe_alloc_virq(void) +{ + unsigned long flags, irq = 0; + int ipos; + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + if (__ipipe_virtual_irq_map != ~0) { + ipos = ffz(__ipipe_virtual_irq_map); + set_bit(ipos, &__ipipe_virtual_irq_map); + irq = ipos + IPIPE_VIRQ_BASE; + } + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return irq; +} + +/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw + acknowledge routine) to an interrupt for a given domain. */ + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask) +{ + unsigned long flags; + int err; + + if (irq >= IPIPE_NR_IRQS) + return -EINVAL; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) + /* Silently unwire interrupts for non-heading domains. */ + modemask &= ~IPIPE_WIRED_MASK; + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + if (handler != NULL) { + + if (handler == IPIPE_SAME_HANDLER) { + handler = ipd->irqs[irq].handler; + cookie = ipd->irqs[irq].cookie; + + if (handler == NULL) { + err = -EINVAL; + goto unlock_and_exit; + } + } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && + ipd->irqs[irq].handler != NULL) { + err = -EBUSY; + goto unlock_and_exit; + } + + /* Wired interrupts can only be delivered to domains + * always heading the pipeline, and using dynamic + * propagation. */ + + if ((modemask & IPIPE_WIRED_MASK) != 0) { + if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { + err = -EINVAL; + goto unlock_and_exit; + } + modemask |= (IPIPE_HANDLE_MASK); + } + + if ((modemask & IPIPE_STICKY_MASK) != 0) + modemask |= IPIPE_HANDLE_MASK; + } else + modemask &= + ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | + IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); + + if (acknowledge == NULL && !ipipe_virtual_irq_p(irq)) + /* Acknowledge handler unspecified for a hw interrupt: + use the Linux-defined handler instead. */ + acknowledge = ipipe_root_domain->irqs[irq].acknowledge; + + ipd->irqs[irq].handler = handler; + ipd->irqs[irq].cookie = cookie; + ipd->irqs[irq].acknowledge = acknowledge; + ipd->irqs[irq].control = modemask; + + if (irq < NR_IRQS && handler != NULL && !ipipe_virtual_irq_p(irq)) { + __ipipe_enable_irqdesc(irq); + + if ((modemask & IPIPE_ENABLE_MASK) != 0) { + if (ipd != ipipe_current_domain) { + /* IRQ enable/disable state is domain-sensitive, so we may + not change it for another domain. What is allowed + however is forcing some domain to handle an interrupt + source, by passing the proper 'ipd' descriptor which + thus may be different from ipipe_current_domain. */ + err = -EPERM; + goto unlock_and_exit; + } + + __ipipe_enable_irq(irq); + } + } + + err = 0; + + unlock_and_exit: + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return err; +} + +/* ipipe_control_irq() -- Change modes of a pipelined interrupt for + * the current domain. */ + +int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) +{ + struct ipipe_domain *ipd; + unsigned long flags; + + if (irq >= IPIPE_NR_IRQS) + return -EINVAL; + + ipd = ipipe_current_domain; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (ipd->irqs[irq].handler == NULL) + setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + if ((setmask & IPIPE_STICKY_MASK) != 0) + setmask |= IPIPE_HANDLE_MASK; + + if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ + clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + ipd->irqs[irq].control &= ~clrmask; + ipd->irqs[irq].control |= setmask; + + if ((setmask & IPIPE_ENABLE_MASK) != 0) + __ipipe_enable_irq(irq); + else if ((clrmask & IPIPE_ENABLE_MASK) != 0) + __ipipe_disable_irq(irq); + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return 0; +} + +/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ + +int fastcall __ipipe_dispatch_event (unsigned event, void *data) +{ + struct ipipe_domain *start_domain, *this_domain, *next_domain; + ipipe_event_handler_t evhand; + struct list_head *pos, *npos; + unsigned long flags; + ipipe_declare_cpuid; + int propagate = 1; + + ipipe_lock_cpu(flags); + + start_domain = this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + list_for_each_safe(pos,npos,&__ipipe_pipeline) { + + /* + * Note: Domain migration may occur while running + * event or interrupt handlers, in which case the + * current register set is going to be recycled for a + * different domain than the initiating one. We do + * care for that, always tracking the current domain + * descriptor upon return from those handlers. + */ + next_domain = list_entry(pos,struct ipipe_domain,p_link); + + /* + * Keep a cached copy of the handler's address since + * ipipe_catch_event() may clear it under our feet. + */ + + evhand = next_domain->evhand[event]; + + if (evhand != NULL) { + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + next_domain->cpudata[cpuid].evsync |= (1LL << event); + ipipe_unlock_cpu(flags); + propagate = !evhand(event,start_domain,data); + ipipe_lock_cpu(flags); + next_domain->cpudata[cpuid].evsync &= ~(1LL << event); + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + if (next_domain != ipipe_root_domain && /* NEVER sync the root stage here. */ + next_domain->cpudata[cpuid].irq_pending_hi != 0 && + !test_bit(IPIPE_STALL_FLAG,&next_domain->cpudata[cpuid].status)) { + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + ipipe_load_cpuid(); + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + per_cpu(ipipe_percpu_domain, cpuid) = this_domain; + + if (next_domain == this_domain || !propagate) + break; + } + + ipipe_unlock_cpu(flags); + + return !propagate; +} + +/* + * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired + * interrupts are immediately and unconditionally delivered to the + * domain heading the pipeline upon receipt, and such domain must have + * been registered as an invariant head for the system (priority == + * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is + * to get an extra-fast dispatching path for those IRQs, by relying on + * a straightforward logic based on assumptions that must always be + * true for invariant head domains. The following assumptions are + * made when dealing with such interrupts: + * + * 1- Wired interrupts are purely dynamic, i.e. the decision to + * propagate them down the pipeline must be done from the head domain + * ISR. + * 2- Wired interrupts cannot be shared or sticky. + * 3- The root domain cannot be an invariant pipeline head, in + * consequence of what the root domain cannot handle wired + * interrupts. + * 4- Wired interrupts must have a valid acknowledge handler for the + * head domain (if needed), and in any case, must not rely on handlers + * provided by lower priority domains during the acknowledge cycle + * (see __ipipe_handle_irq). + * + * Called with hw interrupts off. + */ + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) +{ + struct ipcpudata *cpudata; + struct ipipe_domain *old; + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + cpudata = &head->cpudata[cpuid]; + cpudata->irq_counters[irq].total_hits++; + + if (test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control)) { + /* If we can't process this IRQ right now, we must + * mark it as pending, so that it will get played + * during normal log sync when the corresponding + * interrupt source is eventually unlocked. */ + cpudata->irq_counters[irq].pending_hits++; + return 0; + } + + if (test_bit(IPIPE_STALL_FLAG, &cpudata->status)) { + cpudata->irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(head, cpuid, irq); + return 0; + } + + old = per_cpu(ipipe_percpu_domain, cpuid); + per_cpu(ipipe_percpu_domain, cpuid) = head; /* Switch to the head domain. */ + + __set_bit(IPIPE_STALL_FLAG, &cpudata->status); + head->irqs[irq].handler(irq,head->irqs[irq].cookie); /* Call the ISR. */ + __ipipe_run_irqtail(); + __clear_bit(IPIPE_STALL_FLAG, &cpudata->status); + + /* We expect the caller to start a complete pipeline walk upon + * return, so that propagated interrupts will get played. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) == head) + per_cpu(ipipe_percpu_domain, cpuid) = old; /* Back to the preempted domain. */ + + return 1; +} + +/* + * __ipipe_sync_stage() -- Flush the pending IRQs for the current + * domain (and processor). This routine flushes the interrupt log + * (see "Optimistic interrupt protection" from D. Stodolsky et al. for + * more on the deferred interrupt scheme). Every interrupt that + * occurred while the pipeline was stalled gets played. WARNING: + * callers on SMP boxen should always check for CPU migration on + * return of this routine. One can control the kind of interrupts + * which are going to be sync'ed using the syncmask + * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT + * plays virtual interrupts only. + * + * This routine must be called with hw interrupts off. + */ +void fastcall __ipipe_sync_stage(unsigned long syncmask) +{ + unsigned long mask, submask; + struct ipcpudata *cpudata; + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + int level, rank; + unsigned irq; + + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + cpudata = &ipd->cpudata[cpuid]; + + if (__test_and_set_bit(IPIPE_SYNC_FLAG, &cpudata->status)) + return; + + /* + * The policy here is to keep the dispatching code interrupt-free + * by stalling the current stage. If the upper domain handler + * (which we call) wants to re-enable interrupts while in a safe + * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's + * sigaction()), it will have to unstall (then stall again before + * returning to us!) the stage when it sees fit. + */ + while ((mask = (cpudata->irq_pending_hi & syncmask)) != 0) { + level = __ipipe_ffnz(mask); + + while ((submask = cpudata->irq_pending_lo[level]) != 0) { + rank = __ipipe_ffnz(submask); + irq = (level << IPIPE_IRQ_ISHIFT) + rank; + + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) { + __clear_bit(rank, &cpudata->irq_pending_lo[level]); + continue; + } + + if (--cpudata->irq_counters[irq].pending_hits == 0) { + __clear_bit(rank, &cpudata->irq_pending_lo[level]); + if (cpudata->irq_pending_lo[level] == 0) + __clear_bit(level, &cpudata->irq_pending_hi); + } + + __set_bit(IPIPE_STALL_FLAG, &cpudata->status); + + if (ipd == ipipe_root_domain) + trace_hardirqs_off(); + + __ipipe_run_isr(ipd, irq, cpuid); +#ifdef CONFIG_SMP + { + int _cpuid = ipipe_processor_id(); + + if (_cpuid != cpuid) { /* Handle CPU migration. */ + /* + * We expect any domain to clear the SYNC bit each + * time it switches in a new task, so that preemptions + * and/or CPU migrations (in the SMP case) over the + * ISR do not lock out the log syncer for some + * indefinite amount of time. In the Linux case, + * schedule() handles this (see kernel/sched.c). For + * this reason, we don't bother clearing it here for + * the source CPU in the migration handling case, + * since it must have scheduled another task in by + * now. + */ + cpuid = _cpuid; + cpudata = &ipd->cpudata[cpuid]; + __set_bit(IPIPE_SYNC_FLAG, &cpudata->status); + } + } +#endif /* CONFIG_SMP */ + if (ipd == ipipe_root_domain && + test_bit(IPIPE_STALL_FLAG, &cpudata->status)) + trace_hardirqs_on(); + + __clear_bit(IPIPE_STALL_FLAG, &cpudata->status); + } + } + + __clear_bit(IPIPE_SYNC_FLAG, &cpudata->status); +} + +/* ipipe_register_domain() -- Link a new domain to the pipeline. */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr) +{ + struct list_head *pos; + unsigned long flags; + + if (ipipe_current_domain != ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Only the root domain may register a new domain.\n"); + return -EPERM; + } + + if (attr->priority == IPIPE_HEAD_PRIORITY && + test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags)) + return -EAGAIN; /* Cannot override current head. */ + + flags = ipipe_critical_enter(NULL); + + list_for_each(pos, &__ipipe_pipeline) { + struct ipipe_domain *_ipd = + list_entry(pos, struct ipipe_domain, p_link); + if (_ipd->domid == attr->domid) + break; + } + + ipipe_critical_exit(flags); + + if (pos != &__ipipe_pipeline) + /* A domain with the given id already exists -- fail. */ + return -EBUSY; + + ipd->name = attr->name; + ipd->domid = attr->domid; + ipd->pdd = attr->pdd; + ipd->flags = 0; + + if (attr->priority == IPIPE_HEAD_PRIORITY) { + ipd->priority = INT_MAX; + __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); + } + else + ipd->priority = attr->priority; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + +#ifdef CONFIG_PROC_FS + __ipipe_add_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + flags = ipipe_critical_enter(NULL); + + list_for_each(pos, &__ipipe_pipeline) { + struct ipipe_domain *_ipd = + list_entry(pos, struct ipipe_domain, p_link); + if (ipd->priority > _ipd->priority) + break; + } + + list_add_tail(&ipd->p_link, pos); + + ipipe_critical_exit(flags); + + printk(KERN_WARNING "I-pipe: Domain %s registered.\n", ipd->name); + + /* + * Finally, allow the new domain to perform its initialization + * chores. + */ + + if (attr->entry != NULL) { + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + per_cpu(ipipe_percpu_domain, cpuid) = ipd; + attr->entry(); + per_cpu(ipipe_percpu_domain, cpuid) = ipipe_root_domain; + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0 && + !test_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + ipipe_unlock_cpu(flags); + } + + return 0; +} + +/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ + +int ipipe_unregister_domain(struct ipipe_domain *ipd) +{ + unsigned long flags; + + if (ipipe_current_domain != ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Only the root domain may unregister a domain.\n"); + return -EPERM; + } + + if (ipd == ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Cannot unregister the root domain.\n"); + return -EPERM; + } +#ifdef CONFIG_SMP + { + int nr_cpus = num_online_cpus(), _cpuid; + unsigned irq; + + /* + * In the SMP case, wait for the logged events to drain on + * other processors before eventually removing the domain + * from the pipeline. + */ + + ipipe_unstall_pipeline_from(ipd); + + flags = ipipe_critical_enter(NULL); + + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { + clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); + clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); + set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); + } + + ipipe_critical_exit(flags); + + for (_cpuid = 0; _cpuid < nr_cpus; _cpuid++) + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) + while (ipd->cpudata[_cpuid].irq_counters[irq].pending_hits > 0) + cpu_relax(); + } +#endif /* CONFIG_SMP */ + + mutex_lock(&ipd->mutex); + +#ifdef CONFIG_PROC_FS + __ipipe_remove_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + /* + * Simply remove the domain from the pipeline and we are almost done. + */ + + flags = ipipe_critical_enter(NULL); + list_del_init(&ipd->p_link); + ipipe_critical_exit(flags); + + __ipipe_cleanup_domain(ipd); + + mutex_unlock(&ipd->mutex); + + printk(KERN_WARNING "I-pipe: Domain %s unregistered.\n", ipd->name); + + return 0; +} + +/* + * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of + * a running interrupt handler to the next domain down the pipeline. + * ipipe_schedule_irq() -- Does almost the same as above, but attempts + * to pend the interrupt for the current domain first. + */ +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head) +{ + struct list_head *ln; + unsigned long flags; + ipipe_declare_cpuid; + + if (irq >= IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + ipipe_lock_cpu(flags); + + ln = head; + + while (ln != &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { + ipd->cpudata[cpuid].irq_counters[irq].total_hits++; + ipd->cpudata[cpuid].irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(ipd, cpuid, irq); + ipipe_unlock_cpu(flags); + return 1; + } + + ln = ipd->p_link.next; + } + + ipipe_unlock_cpu(flags); + + return 0; +} + +/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ + +int ipipe_free_virq(unsigned virq) +{ + if (!ipipe_virtual_irq_p(virq)) + return -EINVAL; + + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); + + return 0; +} + +void ipipe_init_attr(struct ipipe_domain_attr *attr) +{ + attr->name = "anon"; + attr->domid = 1; + attr->entry = NULL; + attr->priority = IPIPE_ROOT_PRIO; + attr->pdd = NULL; +} + +/* + * ipipe_catch_event() -- Interpose or remove an event handler for a + * given domain. + */ +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler) +{ + ipipe_event_handler_t old_handler; + unsigned long flags; + int self = 0, cpuid; + + if (event & IPIPE_EVENT_SELF) { + event &= ~IPIPE_EVENT_SELF; + self = 1; + } + + if (event >= IPIPE_NR_EVENTS) + return NULL; + + flags = ipipe_critical_enter(NULL); + + if (!(old_handler = xchg(&ipd->evhand[event],handler))) { + if (handler) { + if (self) + ipd->evself |= (1LL << event); + else + __ipipe_event_monitors[event]++; + } + } + else if (!handler) { + if (ipd->evself & (1LL << event)) + ipd->evself &= ~(1LL << event); + else + __ipipe_event_monitors[event]--; + } else if ((ipd->evself & (1LL << event)) && !self) { + __ipipe_event_monitors[event]++; + ipd->evself &= ~(1LL << event); + } else if (!(ipd->evself & (1LL << event)) && self) { + __ipipe_event_monitors[event]--; + ipd->evself |= (1LL << event); + } + + ipipe_critical_exit(flags); + + if (!handler && ipipe_root_domain_p) { + /* + * If we cleared a handler on behalf of the root + * domain, we have to wait for any current invocation + * to drain, since our caller might subsequently unmap + * the target domain. To this aim, this code + * synchronizes with __ipipe_dispatch_event(), + * guaranteeing that either the dispatcher sees a null + * handler in which case it discards the invocation + * (which also prevents from entering a livelock), or + * finds a valid handler and calls it. Symmetrically, + * ipipe_catch_event() ensures that the called code + * won't be unmapped under our feet until the event + * synchronization flag is cleared for the given event + * on all CPUs. + */ + + for_each_online_cpu(cpuid) { + while (ipd->cpudata[cpuid].evsync & (1LL << event)) + schedule_timeout_interruptible(HZ / 50); + } + } + + return old_handler; +} + +cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) +{ +#ifdef CONFIG_SMP + if (irq >= IPIPE_NR_XIRQS) + /* Allow changing affinity of external IRQs only. */ + return CPU_MASK_NONE; + + if (num_online_cpus() > 1) + return __ipipe_set_irq_affinity(irq,cpumask); +#endif /* CONFIG_SMP */ + + return CPU_MASK_NONE; +} + +int fastcall ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) + +{ +#ifdef CONFIG_SMP + return __ipipe_send_ipi(ipi,cpumask); +#else /* !CONFIG_SMP */ + return -EINVAL; +#endif /* CONFIG_SMP */ +} + +int ipipe_alloc_ptdkey (void) +{ + unsigned long flags; + int key = -1; + + spin_lock_irqsave(&__ipipe_pipelock,flags); + + if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { + key = ffz(__ipipe_ptd_key_map); + set_bit(key,&__ipipe_ptd_key_map); + __ipipe_ptd_key_count++; + } + + spin_unlock_irqrestore(&__ipipe_pipelock,flags); + + return key; +} + +int ipipe_free_ptdkey (int key) +{ + unsigned long flags; + + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + spin_lock_irqsave(&__ipipe_pipelock,flags); + + if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) + __ipipe_ptd_key_count--; + + spin_unlock_irqrestore(&__ipipe_pipelock,flags); + + return 0; +} + +int fastcall ipipe_set_ptd (int key, void *value) + +{ + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + current->ptd[key] = value; + + return 0; +} + +void fastcall *ipipe_get_ptd (int key) + +{ + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return NULL; + + return current->ptd[key]; +} + +#ifdef CONFIG_PROC_FS + +struct proc_dir_entry *ipipe_proc_root; + +static int __ipipe_version_info_proc(char *page, + char **start, + off_t off, int count, int *eof, void *data) +{ + int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); + + len -= off; + + if (len <= off + count) + *eof = 1; + + *start = page + off; + + if(len > count) + len = count; + + if(len < 0) + len = 0; + + return len; +} + +static int __ipipe_common_info_show(struct seq_file *p, void *data) +{ + struct ipipe_domain *ipd = (struct ipipe_domain *)p->private; + char handling, stickiness, lockbit, exclusive, virtuality; + + unsigned long ctlbits; + unsigned irq; + + seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, [D]iscarded)\n"); + seq_printf(p, " |+---- Sticky\n"); + seq_printf(p, " ||+--- Locked\n"); + seq_printf(p, " |||+-- Exclusive\n"); + seq_printf(p, " ||||+- Virtual\n"); + seq_printf(p, "[IRQ] |||||\n"); + + mutex_lock(&ipd->mutex); + + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { + /* Remember to protect against + * ipipe_virtual_irq/ipipe_control_irq if more fields + * get involved. */ + ctlbits = ipd->irqs[irq].control; + + if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) + /* + * There might be a hole between the last external + * IRQ and the first virtual one; skip it. + */ + continue; + + if (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) + /* Non-allocated virtual IRQ; skip it. */ + continue; + + /* + * Statuses are as follows: + * o "accepted" means handled _and_ passed down the pipeline. + * o "grabbed" means handled, but the interrupt might be + * terminated _or_ passed down the pipeline depending on + * what the domain handler asks for to the I-pipe. + * o "wired" is basically the same as "grabbed", except that + * the interrupt is unconditionally delivered to an invariant + * pipeline head domain. + * o "passed" means unhandled by the domain but passed + * down the pipeline. + * o "discarded" means unhandled and _not_ passed down the + * pipeline. The interrupt merely disappears from the + * current domain down to the end of the pipeline. + */ + if (ctlbits & IPIPE_HANDLE_MASK) { + if (ctlbits & IPIPE_PASS_MASK) + handling = 'A'; + else if (ctlbits & IPIPE_WIRED_MASK) + handling = 'W'; + else + handling = 'G'; + } else if (ctlbits & IPIPE_PASS_MASK) + /* Do not output if no major action is taken. */ + continue; + else + handling = 'D'; + + if (ctlbits & IPIPE_STICKY_MASK) + stickiness = 'S'; + else + stickiness = '.'; + + if (ctlbits & IPIPE_LOCK_MASK) + lockbit = 'L'; + else + lockbit = '.'; + + if (ctlbits & IPIPE_EXCLUSIVE_MASK) + exclusive = 'X'; + else + exclusive = '.'; + + if (ipipe_virtual_irq_p(irq)) + virtuality = 'V'; + else + virtuality = '.'; + + seq_printf(p, " %3u: %c%c%c%c%c\n", + irq, handling, stickiness, lockbit, exclusive, virtuality); + } + + seq_printf(p, "[Domain info]\n"); + + seq_printf(p, "id=0x%.8x\n", ipd->domid); + + if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) + seq_printf(p, "priority=topmost\n"); + else + seq_printf(p, "priority=%d\n", ipd->priority); + + mutex_unlock(&ipd->mutex); + + return 0; +} + +static int __ipipe_common_info_open(struct inode *inode, struct file *file) +{ + return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->data); +} + +static struct file_operations __ipipe_info_proc_ops = { + .owner = THIS_MODULE, + .open = __ipipe_common_info_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd) +{ + struct proc_dir_entry *e = create_proc_entry(ipd->name, 0444, ipipe_proc_root); + if (e) { + e->proc_fops = &__ipipe_info_proc_ops; + e->data = (void*) ipd; + } +} + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) +{ + remove_proc_entry(ipd->name,ipipe_proc_root); +} + +void __init ipipe_init_proc(void) +{ + ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); + create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); + __ipipe_add_domain_proc(ipipe_root_domain); + + __ipipe_init_tracer(); +} + +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +void ipipe_check_context(struct ipipe_domain *border_ipd) +{ + static int check_hit; + + if (likely(ipipe_current_domain->priority <= border_ipd->priority) || + check_hit) + return; + + check_hit = 1; + + ipipe_trace_panic_freeze(); + ipipe_set_printk_sync(ipipe_current_domain); + printk(KERN_ERR "I-pipe: Detected illicit call from domain '%s'\n" + KERN_ERR " into a service reserved for domain '%s' and " + "below.\n", + ipipe_current_domain->name, border_ipd->name); + show_stack(NULL, NULL); + ipipe_trace_panic_dump(); +} + +EXPORT_SYMBOL(ipipe_check_context); +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ + +EXPORT_SYMBOL(ipipe_virtualize_irq); +EXPORT_SYMBOL(ipipe_control_irq); +EXPORT_SYMBOL(ipipe_suspend_domain); +EXPORT_SYMBOL(ipipe_alloc_virq); +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); +EXPORT_SYMBOL(ipipe_root); +EXPORT_SYMBOL(ipipe_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_restore_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_head); +EXPORT_SYMBOL(__ipipe_restore_pipeline_head); +EXPORT_SYMBOL(__ipipe_unstall_root); +EXPORT_SYMBOL(__ipipe_restore_root); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(__ipipe_stall_root); +EXPORT_SYMBOL(__ipipe_test_root); +EXPORT_SYMBOL(__ipipe_test_and_stall_root); +#endif /* CONFIG_SMP */ +EXPORT_SYMBOL(__ipipe_spin_lock_irq); +EXPORT_SYMBOL(__ipipe_spin_unlock_irq); +EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); +EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); +EXPORT_SYMBOL(__ipipe_pipeline); +EXPORT_SYMBOL(ipipe_register_domain); +EXPORT_SYMBOL(ipipe_unregister_domain); +EXPORT_SYMBOL(ipipe_free_virq); +EXPORT_SYMBOL(ipipe_init_attr); +EXPORT_SYMBOL(ipipe_catch_event); +EXPORT_SYMBOL(ipipe_alloc_ptdkey); +EXPORT_SYMBOL(ipipe_free_ptdkey); +EXPORT_SYMBOL(ipipe_set_ptd); +EXPORT_SYMBOL(ipipe_get_ptd); +EXPORT_SYMBOL(ipipe_set_irq_affinity); +EXPORT_SYMBOL(ipipe_send_ipi); +EXPORT_SYMBOL(__ipipe_schedule_irq); diff -Naur linux-2.6.21.5.orig/kernel/ipipe/Kconfig linux-2.6.21.5/kernel/ipipe/Kconfig --- linux-2.6.21.5.orig/kernel/ipipe/Kconfig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/kernel/ipipe/Kconfig 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,6 @@ +config IPIPE + bool "Interrupt pipeline" + default y + ---help--- + Activate this option if you want the interrupt pipeline to be + compiled in. diff -Naur linux-2.6.21.5.orig/kernel/ipipe/Kconfig.debug linux-2.6.21.5/kernel/ipipe/Kconfig.debug --- linux-2.6.21.5.orig/kernel/ipipe/Kconfig.debug 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/kernel/ipipe/Kconfig.debug 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,88 @@ +config IPIPE_DEBUG + bool "I-pipe debugging" + depends on IPIPE + +config IPIPE_DEBUG_CONTEXT + bool "Check for illicit cross-domain calls" + depends on IPIPE_DEBUG + default y + ---help--- + Enable this feature to arm checkpoints in the kernel that + verify the correct invocation context. On entry of critical + Linux services a warning is issued if the caller is not + running over the root domain. + +config IPIPE_TRACE + bool "Latency tracing" + depends on IPIPE_DEBUG + select FRAME_POINTER + select KALLSYMS + select PROC_FS + ---help--- + Activate this option if you want to use per-function tracing of + the kernel. The tracer will collect data via instrumentation + features like the one below or with the help of explicite calls + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the + in-kernel tracing API. The collected data and runtime control + is available via /proc/ipipe/trace/*. + +if IPIPE_TRACE + +config IPIPE_TRACE_ENABLE + bool "Enable tracing on boot" + default y + ---help--- + Disable this option if you want to arm the tracer after booting + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce + boot time on slow embedded devices due to the tracer overhead. + +config IPIPE_TRACE_MCOUNT + bool "Instrument function entries" + default y + ---help--- + When enabled, records every kernel function entry in the tracer + log. While this slows down the system noticeably, it provides + the highest level of information about the flow of events. + However, it can be switch off in order to record only explicit + I-pipe trace points. + +config IPIPE_TRACE_IRQSOFF + bool "Trace IRQs-off times" + default y + ---help--- + Activate this option if I-pipe shall trace the longest path + with hard-IRQs switched off. + +config IPIPE_TRACE_SHIFT + int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" + range 10 18 + default 14 + ---help--- + The number of trace points to hold tracing data for each + trace path, as a power of 2. + +config IPIPE_TRACE_VMALLOC + bool "Use vmalloc'ed trace buffer" + default y if EMBEDDED + ---help--- + Instead of reserving static kernel data, the required buffer + is allocated via vmalloc during boot-up when this option is + enabled. This can help to start systems that are low on memory, + but it slightly degrades overall performance. Try this option + when a traced kernel hangs unexpectedly at boot time. + +config IPIPE_TRACE_PANIC + bool "Enable panic back traces" + default y + ---help--- + Provides services to freeze and dump a back trace on panic + situations. This is used on IPIPE_DEBUG_CONTEXT exceptions + as well as ordinary kernel oopses. You can control the number + of printed back trace points via /proc/ipipe/trace. + +config IPIPE_TRACE_ENABLE_VALUE + int + default 0 if !IPIPE_TRACE_ENABLE + default 1 if IPIPE_TRACE_ENABLE + +endif diff -Naur linux-2.6.21.5.orig/kernel/ipipe/Makefile linux-2.6.21.5/kernel/ipipe/Makefile --- linux-2.6.21.5.orig/kernel/ipipe/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/kernel/ipipe/Makefile 2007-08-07 10:56:22.000000000 +0200 @@ -0,0 +1,3 @@ + +obj-$(CONFIG_IPIPE) += core.o +obj-$(CONFIG_IPIPE_TRACE) += tracer.o diff -Naur linux-2.6.21.5.orig/kernel/ipipe/tracer.c linux-2.6.21.5/kernel/ipipe/tracer.c --- linux-2.6.21.5.orig/kernel/ipipe/tracer.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.21.5/kernel/ipipe/tracer.c 2007-08-07 11:15:34.000000000 +0200 @@ -0,0 +1,1292 @@ +/* -*- linux-c -*- + * kernel/ipipe/tracer.c + * + * Copyright (C) 2005 Luotao Fu. + * 2005, 2006 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ +#define IPIPE_DEFAULT_ACTIVE 0 +#define IPIPE_DEFAULT_MAX 1 +#define IPIPE_DEFAULT_FROZEN 2 + +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) + +#define IPIPE_DEFAULT_PRE_TRACE 10 +#define IPIPE_DEFAULT_POST_TRACE 10 +#define IPIPE_DEFAULT_BACK_TRACE 30 + +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ + +#define IPIPE_TFLG_NMI_LOCK 0x0001 +#define IPIPE_TFLG_NMI_HIT 0x0002 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 + +#define IPIPE_TFLG_HWIRQ_OFF 0x0100 +#define IPIPE_TFLG_FREEZING 0x0200 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ +#define IPIPE_TFLG_DOMSTATE_BITS 3 + +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) + + +struct ipipe_trace_point{ + short type; + short flags; + unsigned long eip; + unsigned long parent_eip; + unsigned long v; + unsigned long long timestamp; +}; + +struct ipipe_trace_path{ + volatile int flags; + int dump_lock; /* separated from flags due to cross-cpu access */ + int trace_pos; /* next point to fill */ + int begin, end; /* finalised path begin and end */ + int post_trace; /* non-zero when in post-trace phase */ + unsigned long long length; /* max path length in cycles */ + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ + unsigned long nmi_saved_parent_eip; + unsigned long nmi_saved_v; + struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; +} ____cacheline_aligned_in_smp; + +enum ipipe_trace_type +{ + IPIPE_TRACE_FUNC = 0, + IPIPE_TRACE_BEGIN, + IPIPE_TRACE_END, + IPIPE_TRACE_FREEZE, + IPIPE_TRACE_SPECIAL, + IPIPE_TRACE_PID, +}; + +#define IPIPE_TYPE_MASK 0x0007 +#define IPIPE_TYPE_BITS 3 + + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + +static struct ipipe_trace_path *trace_paths[NR_CPUS]; + +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ + +static struct ipipe_trace_path trace_paths[NR_CPUS][IPIPE_TRACE_PATHS] = + { [0 ... NR_CPUS-1] = + { [0 ... IPIPE_TRACE_PATHS-1] = + { .begin = -1, .end = -1 } + } + }; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +int ipipe_trace_enable = 0; + +static int active_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_ACTIVE }; +static int max_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_MAX }; +static int frozen_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_FROZEN }; +static IPIPE_DEFINE_SPINLOCK(global_path_lock); +static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; +static int post_trace = IPIPE_DEFAULT_POST_TRACE; +static int back_trace = IPIPE_DEFAULT_BACK_TRACE; +static int verbose_trace; +static unsigned long trace_overhead; + +static DEFINE_MUTEX(out_mutex); +static struct ipipe_trace_path *print_path; +#ifdef CONFIG_IPIPE_TRACE_PANIC +static struct ipipe_trace_path *panic_path; +#endif /* CONFIG_IPIPE_TRACE_PANIC */ +static int print_pre_trace; +static int print_post_trace; + + +static long __ipipe_signed_tsc2us(long long tsc); +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); + + +static notrace void +__ipipe_store_domain_states(struct ipipe_trace_point *point, int cpu_id) +{ + struct list_head *pos; + int i = 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpu_id].status)) + point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); + + if (ipd == per_cpu(ipipe_percpu_domain, cpu_id)) + point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; + + if (++i > IPIPE_TFLG_DOMSTATE_BITS) + break; + } +} + +static notrace int __ipipe_get_free_trace_path(int old, int cpu_id) +{ + int new_active = old; + struct ipipe_trace_path *tp; + + do { + if (++new_active == IPIPE_TRACE_PATHS) + new_active = 0; + tp = &trace_paths[cpu_id][new_active]; + } while ((new_active == max_path[cpu_id]) || + (new_active == frozen_path[cpu_id]) || + tp->dump_lock); + + return new_active; +} + +static notrace void +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, + struct ipipe_trace_path *old_tp, int old_pos) +{ + int i; + + new_tp->trace_pos = pre_trace+1; + + for (i = new_tp->trace_pos; i > 0; i--) + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], + &old_tp->point[WRAP_POINT_NO(old_pos-i)], + sizeof(struct ipipe_trace_point)); + + /* mark the end (i.e. the point before point[0]) invalid */ + new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_end(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = active_path[cpu_id]; + unsigned long long length; + + /* do we have a new worst case? */ + length = tp->point[tp->end].timestamp - + tp->point[tp->begin].timestamp; + if (length > (trace_paths[cpu_id][max_path[cpu_id]]).length) { + /* we need protection here against other cpus trying + to start a proc dump */ + spin_lock(&global_path_lock); + + /* active path holds new worst case */ + tp->length = length; + max_path[cpu_id] = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu_id); + + spin_unlock(&global_path_lock); + + tp = &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + } + + return tp; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_freeze(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = active_path[cpu_id]; + int i; + + /* frozen paths have no core (begin=end) */ + tp->begin = tp->end; + + /* we need protection here against other cpus trying + * to set their frozen path or to start a proc dump */ + spin_lock(&global_path_lock); + + frozen_path[cpu_id] = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu_id); + + /* check if this is the first frozen path */ + for_each_online_cpu(i) { + if ((i != cpu_id) && + (trace_paths[i][frozen_path[i]].end >= 0)) + tp->end = -1; + } + + spin_unlock(&global_path_lock); + + tp = &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + + return tp; +} + +void notrace +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + struct ipipe_trace_path *tp, *old_tp; + int pos, next_pos, begin; + struct ipipe_trace_point *point; + unsigned long flags; + int cpu_id; + + local_irq_save_hw_notrace(flags); + + cpu_id = ipipe_processor_id(); + restart: + tp = old_tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here starts a race window with NMIs - catched below */ + + /* check for NMI recursion */ + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { + tp->flags |= IPIPE_TFLG_NMI_HIT; + + /* first freeze request from NMI context? */ + if ((type == IPIPE_TRACE_FREEZE) && + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { + /* save arguments and mark deferred freezing */ + tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; + tp->nmi_saved_eip = eip; + tp->nmi_saved_parent_eip = parent_eip; + tp->nmi_saved_v = v; + } + return; /* no need for restoring flags inside IRQ */ + } + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (unlikely(tp != &trace_paths[cpu_id][active_path[cpu_id]])) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + /* get the point buffer */ + pos = tp->trace_pos; + point = &tp->point[pos]; + + /* store all trace point data */ + point->type = type; + point->flags = raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF : 0; + point->eip = eip; + point->parent_eip = parent_eip; + point->v = v; + ipipe_read_tsc(point->timestamp); + + __ipipe_store_domain_states(point, cpu_id); + + /* forward to next point buffer */ + next_pos = WRAP_POINT_NO(pos+1); + tp->trace_pos = next_pos; + + /* only mark beginning if we haven't started yet */ + begin = tp->begin; + if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) + tp->begin = pos; + + /* end of critical path, start post-trace if not already started */ + if (unlikely(type == IPIPE_TRACE_END) && + (begin >= 0) && !tp->post_trace) + tp->post_trace = post_trace + 1; + + /* freeze only if the slot is free and we are not already freezing */ + if (unlikely(type == IPIPE_TRACE_FREEZE) && + (trace_paths[cpu_id][frozen_path[cpu_id]].begin < 0) && + !(tp->flags & IPIPE_TFLG_FREEZING)) { + tp->post_trace = post_trace + 1; + tp->flags |= IPIPE_TFLG_FREEZING; + } + + /* enforce end of trace in case of overflow */ + if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { + tp->end = pos; + goto enforce_end; + } + + /* stop tracing this path if we are in post-trace and + * a) that phase is over now or + * b) a new TRACE_BEGIN came in but we are not freezing this path */ + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || + ((type == IPIPE_TRACE_BEGIN) && + !(tp->flags & IPIPE_TFLG_FREEZING))))) { + /* store the path's end (i.e. excluding post-trace) */ + tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); + + enforce_end: + if (tp->flags & IPIPE_TFLG_FREEZING) + tp = __ipipe_trace_freeze(cpu_id, tp, pos); + else + tp = __ipipe_trace_end(cpu_id, tp, pos); + + /* reset the active path, maybe already start a new one */ + tp->begin = (type == IPIPE_TRACE_BEGIN) ? + WRAP_POINT_NO(tp->trace_pos - 1) : -1; + tp->end = -1; + tp->post_trace = 0; + tp->flags = 0; + + /* update active_path not earlier to avoid races with NMIs */ + active_path[cpu_id] = tp - trace_paths[cpu_id]; + } + + /* we still have old_tp and point, + * let's reset NMI lock and check for catches */ + old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { + /* well, this late tagging may not immediately be visible for + * other cpus already dumping this path - a minor issue */ + point->flags |= IPIPE_TFLG_NMI_HIT; + + /* handle deferred freezing from NMI context */ + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, + old_tp->nmi_saved_parent_eip, + old_tp->nmi_saved_v); + } + + local_irq_restore_hw_notrace(flags); +} + +static unsigned long __ipipe_global_path_lock(void) +{ + unsigned long flags; + int cpu_id; + struct ipipe_trace_path *tp; + + spin_lock_irqsave(&global_path_lock, flags); + + cpu_id = ipipe_processor_id(); + restart: + tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here is small race window with NMIs - catched below */ + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (tp != &trace_paths[cpu_id][active_path[cpu_id]]) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + return flags; +} + +static void __ipipe_global_path_unlock(unsigned long flags) +{ + int cpu_id; + struct ipipe_trace_path *tp; + + /* release spinlock first - it's not involved in the NMI issue */ + __ipipe_spin_unlock_irqbegin(&global_path_lock); + + cpu_id = ipipe_processor_id(); + tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* handle deferred freezing from NMI context */ + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, + tp->nmi_saved_parent_eip, tp->nmi_saved_v); + + /* See __ipipe_spin_lock_irqsave() and friends. */ + __ipipe_spin_unlock_irqcomplete(flags); +} + +void notrace ipipe_trace_begin(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_begin); + +void notrace ipipe_trace_end(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_end); + +void notrace ipipe_trace_freeze(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_freeze); + +void notrace ipipe_trace_special(unsigned char id, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_special); + +void notrace ipipe_trace_pid(pid_t pid, short prio) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, pid); +} +EXPORT_SYMBOL(ipipe_trace_pid); + +int ipipe_trace_max_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_online_cpu(cpu_id) { + path = &trace_paths[cpu_id][max_path[cpu_id]]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_max_reset); + +int ipipe_trace_frozen_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_online_cpu(cpu_id) { + path = &trace_paths[cpu_id][frozen_path[cpu_id]]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_frozen_reset); + +static void +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, + int trylock) +{ + struct task_struct *task = NULL; + char buf[8]; + int i; + int locked = 1; + + if (trylock) { + if (!read_trylock(&tasklist_lock)) + locked = 0; + } else + read_lock(&tasklist_lock); + + if (locked) + task = find_task_by_pid((pid_t)point->v); + + if (task) + strncpy(task_info, task->comm, 11); + else + strcpy(task_info, "--"); + + if (locked) + read_unlock(&tasklist_lock); + + for (i = strlen(task_info); i < 11; i++) + task_info[i] = ' '; + + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); + strcpy(task_info + (11 - strlen(buf)), buf); +} + +#ifdef CONFIG_IPIPE_TRACE_PANIC +void ipipe_trace_panic_freeze(void) +{ + unsigned long flags; + int cpu_id; + + if (!ipipe_trace_enable) + return; + + ipipe_trace_enable = 0; + local_irq_save_hw_notrace(flags); + + cpu_id = ipipe_processor_id(); + + panic_path = &trace_paths[cpu_id][active_path[cpu_id]]; + + local_irq_restore_hw(flags); +} +EXPORT_SYMBOL(ipipe_trace_panic_freeze); + +void ipipe_trace_panic_dump(void) +{ + int cnt = back_trace; + int start, pos; + char task_info[12]; + + if (!panic_path) + return; + + printk("I-pipe tracer log (%d points):\n", cnt); + + start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); + + while (cnt-- > 0) { + struct ipipe_trace_point *point = &panic_path->point[pos]; + long time; + char buf[16]; + int i; + + printk(" %c", + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + printk("%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '*' : ' ')); + + if (!point->eip) + printk("--\n"); + else { + __ipipe_trace_point_type(buf, point); + printk(buf); + + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + printk(" "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(task_info, + point, 1); + printk(task_info); + break; + + default: + printk("0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + panic_path->point[start].timestamp); + printk(" %5ld ", time); + + __ipipe_print_symname(NULL, point->eip); + printk(" ("); + __ipipe_print_symname(NULL, point->parent_eip); + printk(")\n"); + } + pos = WRAP_POINT_NO(pos - 1); + } + + panic_path = NULL; +} +EXPORT_SYMBOL(ipipe_trace_panic_dump); +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + + +/* --- /proc output --- */ + +static notrace int __ipipe_in_critical_trpath(long point_no) +{ + return ((WRAP_POINT_NO(point_no-print_path->begin) < + WRAP_POINT_NO(print_path->end-print_path->begin)) || + ((print_path->end == print_path->begin) && + (WRAP_POINT_NO(point_no-print_path->end) > + print_post_trace))); +} + +static long __ipipe_signed_tsc2us(long long tsc) +{ + unsigned long long abs_tsc; + long us; + + /* ipipe_tsc2us works on unsigned => handle sign separately */ + abs_tsc = (tsc >= 0) ? tsc : -tsc; + us = ipipe_tsc2us(abs_tsc); + if (tsc < 0) + return -us; + else + return us; +} + +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) +{ + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + strcpy(buf, "func "); + break; + + case IPIPE_TRACE_BEGIN: + strcpy(buf, "begin "); + break; + + case IPIPE_TRACE_END: + strcpy(buf, "end "); + break; + + case IPIPE_TRACE_FREEZE: + strcpy(buf, "freeze "); + break; + + case IPIPE_TRACE_SPECIAL: + sprintf(buf, "(0x%02x) ", + point->type >> IPIPE_TYPE_BITS); + break; + + case IPIPE_TRACE_PID: + sprintf(buf, "[%5d] ", (pid_t)point->v); + break; + } +} + +static void +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) +{ + char mark = ' '; + int point_no = point - print_path->point; + int i; + + if (print_path->end == point_no) + mark = '<'; + else if (print_path->begin == point_no) + mark = '>'; + else if (__ipipe_in_critical_trpath(point_no)) + mark = ':'; + seq_printf(m, "%c%c", mark, + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + if (!verbose_trace) + return; + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + seq_printf(m, "%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); +} + +static void +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) +{ + unsigned long delay = 0; + int next; + char *mark = " "; + + next = WRAP_POINT_NO(point+1 - print_path->point); + + if (next != print_path->trace_pos) + delay = ipipe_tsc2ns(print_path->point[next].timestamp - + point->timestamp); + + if (__ipipe_in_critical_trpath(point - print_path->point)) { + if (delay > IPIPE_DELAY_WARN) + mark = "! "; + else if (delay > IPIPE_DELAY_NOTE) + mark = "+ "; + } + seq_puts(m, mark); + + if (verbose_trace) + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); + else + seq_puts(m, " "); +} + +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + +#ifdef CONFIG_IPIPE_TRACE_PANIC + if (!m) { + /* panic dump */ + if (sym_name) { + printk("%s+0x%lx", sym_name, offset); + if (modname) + printk(" [%s]", modname); + } + } else +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + { + if (sym_name) { + if (verbose_trace) { + seq_printf(m, "%s+0x%lx", sym_name, offset); + if (modname) + seq_printf(m, " [%s]", modname); + } else + seq_puts(m, sym_name); + } else + seq_printf(m, "<%08lx>", eip); + } +} + +#if defined(CONFIG_XENO_OPT_DEBUG) || defined(CONFIG_DEBUG_PREEMPT) +static void __ipipe_print_dbgwarning(struct seq_file *m) +{ + seq_puts(m, "\n******** WARNING ********\n" + "The following debugging options will increase the observed " + "latencies:\n" +#ifdef CONFIG_XENO_OPT_DEBUG + " o CONFIG_XENO_OPT_DEBUG\n" +#endif /* CONFIG_XENO_OPT_DEBUG */ +#ifdef CONFIG_XENO_OPT_DEBUG_QUEUES + " o CONFIG_XENO_OPT_DEBUG_QUEUES (very costly)\n" +#endif /* CONFIG_XENO_OPT_DEBUG */ +#ifdef CONFIG_DEBUG_PREEMPT + " o CONFIG_DEBUG_PREEMPT\n" +#endif /* CONFIG_DEBUG_PREEMPT */ + "\n"); +} +#else /* !WARN_ON_DEBUGGING_LATENCIES */ +# define __ipipe_print_dbgwarning(m) +#endif /* WARN_ON_DEBUGGING_LATENCIES */ + +static void __ipipe_print_headline(struct seq_file *m) +{ + seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " + "us\n\n", trace_overhead/1000, trace_overhead%1000); + + if (verbose_trace) { + const char *name[4] = { [0 ... 3] = "" }; + struct list_head *pos; + int i = 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(pos, struct ipipe_domain, p_link); + + name[i] = ipd->name; + if (++i > 3) + break; + } + + seq_printf(m, + " +----- Hard IRQs ('|': locked)\n" + " |+---- %s\n" + " ||+--- %s\n" + " |||+-- %s\n" + " ||||+- %s%s\n" + " ||||| +---------- " + "Delay flag ('+': > %d us, '!': > %d us)\n" + " ||||| | +- " + "NMI noise ('N')\n" + " ||||| | |\n" + " Type User Val. Time Delay Function " + "(Parent)\n", + name[3], name[2], name[1], name[0], + name[0] ? " ('*': domain stalled, '+': current, " + "'#': current+stalled)" : "", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); + } else + seq_printf(m, + " +--------------- Hard IRQs ('|': locked)\n" + " | +- Delay flag " + "('+': > %d us, '!': > %d us)\n" + " | |\n" + " Type Time Function (Parent)\n", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); +} + +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + unsigned long length_usecs; + int points, i; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the longest of all per-cpu paths */ + print_path = NULL; + for_each_online_cpu(i) { + path = &trace_paths[i][max_path[i]]; + if ((print_path == NULL) || + (path->length > print_path->length)) + print_path = path; + } + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + /* does this path actually contain data? */ + if (print_path->end == print_path->begin) + return NULL; + + /* number of points inside the critical path */ + points = WRAP_POINT_NO(print_path->end-print_path->begin+1); + + /* pre- and post-tracing length, post-trace length was frozen + in __ipipe_trace, pre-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = pre_trace; + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - + print_post_trace; + + length_usecs = ipipe_tsc2us(print_path->length); + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" + "------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + __ipipe_print_dbgwarning(m); + seq_printf(m, "Begin: %lld cycles, Trace Points: %d (-%d/+%d), " + "Length: %lu us\n", + print_path->point[print_path->begin].timestamp, + points, print_pre_trace, print_post_trace, length_usecs); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + n)]; +} + +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) +{ + loff_t n = ++*pos; + + /* check if we are inside the trace range with the next entry */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + *pos)]; +} + +static void __ipipe_prtrace_stop(struct seq_file *m, void *p) +{ + if (print_path) + print_path->dump_lock = 0; + mutex_unlock(&out_mutex); +} + +static int __ipipe_prtrace_show(struct seq_file *m, void *p) +{ + long time; + struct ipipe_trace_point *point = p; + char buf[16]; + + if (!point->eip) { + seq_puts(m, "--\n"); + return 0; + } + + __ipipe_print_pathmark(m, point); + __ipipe_trace_point_type(buf, point); + seq_puts(m, buf); + if (verbose_trace) + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + seq_puts(m, " "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(buf, point, 0); + seq_puts(m, buf); + break; + + default: + seq_printf(m, "0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + print_path->point[print_path->begin].timestamp); + seq_printf(m, "%5ld", time); + + __ipipe_print_delay(m, point); + __ipipe_print_symname(m, point->eip); + seq_puts(m, " ("); + __ipipe_print_symname(m, point->parent_eip); + seq_puts(m, ")\n"); + + return 0; +} + +static struct seq_operations __ipipe_max_ptrace_ops = { + .start = __ipipe_max_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_max_ptrace_ops); +} + +static ssize_t +__ipipe_max_reset(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + mutex_lock(&out_mutex); + ipipe_trace_max_reset(); + mutex_unlock(&out_mutex); + + return count; +} + +struct file_operations __ipipe_max_prtrace_fops = { + .open = __ipipe_max_prtrace_open, + .read = seq_read, + .write = __ipipe_max_reset, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + int i; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the first of all per-cpu frozen paths */ + print_path = NULL; + for_each_online_cpu(i) { + path = &trace_paths[i][frozen_path[i]]; + if (path->end >= 0) + print_path = path; + } + if (print_path) + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!print_path) + return NULL; + + /* back- and post-tracing length, post-trace length was frozen + in __ipipe_trace, back-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = back_trace-1; /* substract freeze point */ + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 2 - + print_post_trace; + + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" + "------------------------------------------------------" + "------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + __ipipe_print_dbgwarning(m); + seq_printf(m, "Freeze: %lld cycles, Trace Points: %d (+%d)\n", + print_path->point[print_path->begin].timestamp, + print_pre_trace+1, print_post_trace); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= print_pre_trace + 1 + print_post_trace) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin- + print_pre_trace+n)]; +} + +static struct seq_operations __ipipe_frozen_ptrace_ops = { + .start = __ipipe_frozen_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_frozen_ptrace_ops); +} + +static ssize_t +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, pbuffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + ipipe_trace_frozen_reset(); + if (val > 0) + ipipe_trace_freeze(-1); + mutex_unlock(&out_mutex); + + return count; +} + +struct file_operations __ipipe_frozen_prtrace_fops = { + .open = __ipipe_frozen_prtrace_open, + .read = seq_read, + .write = __ipipe_frozen_ctrl, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __ipipe_rd_proc_val(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%u\n", *(int *)data); + len -= off; + if (len <= off + count) + *eof = 1; + *start = page + off; + if (len > count) + len = count; + if (len < 0) + len = 0; + + return len; +} + +static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + *(int *)data = val; + mutex_unlock(&out_mutex); + + return count; +} + +extern struct proc_dir_entry *ipipe_proc_root; + +static void __init +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, + const char *name, int *value_ptr) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry(name, 0644, trace_dir); + if (entry) { + entry->data = value_ptr; + entry->read_proc = __ipipe_rd_proc_val; + entry->write_proc = __ipipe_wr_proc_val; + entry->owner = THIS_MODULE; + } +} + +void __init __ipipe_init_tracer(void) +{ + struct proc_dir_entry *trace_dir; + struct proc_dir_entry *entry; + unsigned long long start, end, min = ULLONG_MAX; + int i; +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + int cpu, path; + + for_each_possible_cpu(cpu) { + trace_paths[cpu] = vmalloc( + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + if (trace_paths[cpu] == NULL) { + printk(KERN_ERR "I-pipe: " + "insufficient memory for trace buffer.\n"); + return; + } + memset(trace_paths[cpu], 0, + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + for (path = 0; path < IPIPE_TRACE_PATHS; path++) { + trace_paths[cpu][path].begin = -1; + trace_paths[cpu][path].end = -1; + } + } +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + ipipe_trace_enable = CONFIG_IPIPE_TRACE_ENABLE_VALUE; + + /* Calculate minimum overhead of __ipipe_trace() */ + local_irq_disable_hw(); + for (i = 0; i < 100; i++) { + ipipe_read_tsc(start); + __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, 0); + ipipe_read_tsc(end); + + end -= start; + if (end < min) + min = end; + } + local_irq_enable_hw(); + trace_overhead = ipipe_tsc2ns(min); + + trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); + + entry = create_proc_entry("max", 0644, trace_dir); + if (entry) + entry->proc_fops = &__ipipe_max_prtrace_fops; + + entry = create_proc_entry("frozen", 0644, trace_dir); + if (entry) + entry->proc_fops = &__ipipe_frozen_prtrace_fops; + + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", + &pre_trace); + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", + &post_trace); + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", + &back_trace); + __ipipe_create_trace_proc_val(trace_dir, "verbose", + &verbose_trace); + __ipipe_create_trace_proc_val(trace_dir, "enable", + &ipipe_trace_enable); +} diff -Naur linux-2.6.21.5.orig/kernel/irq/chip.c linux-2.6.21.5/kernel/irq/chip.c --- linux-2.6.21.5.orig/kernel/irq/chip.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/irq/chip.c 2007-08-07 10:56:22.000000000 +0200 @@ -337,7 +337,9 @@ irqreturn_t action_ret; spin_lock(&desc->lock); +#ifndef CONFIG_IPIPE mask_ack_irq(desc, irq); +#endif /* CONFIG_IPIPE */ if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -417,8 +419,13 @@ spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; +#ifdef CONFIG_IPIPE + desc->chip->unmask(irq); +#endif out: +#ifndef CONFIG_IPIPE desc->chip->eoi(irq); +#endif spin_unlock(&desc->lock); } @@ -462,8 +469,10 @@ kstat_cpu(cpu).irqs[irq]++; +#ifndef CONFIG_IPIPE /* Start handling the irq */ desc->chip->ack(irq); +#endif /* CONFIG_IPIPE */ /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -503,6 +512,70 @@ spin_unlock(&desc->lock); } +#ifdef CONFIG_IPIPE + +void fastcall __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc) +{ + mask_ack_irq(desc, irq); +} + +void fastcall __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc) +{ + if (desc->chip->unmask) + desc->chip->unmask(irq); +} + +void fastcall __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *desc) +{ + desc->chip->mask(irq); + desc->chip->eoi(irq); +} + +void fastcall __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *desc) +{ + desc->chip->unmask(irq); +} + +void fastcall __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) +{ + desc->chip->ack(irq); +} + +void fastcall __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) +{ + static int done; + + handle_bad_irq(irq, desc); + + if (!done) { + printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", + __FUNCTION__, irq); + done = 1; + } +} + +void fastcall __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) +{ +} + +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_SMP /** * handle_percpu_IRQ - Per CPU local irq handler @@ -518,8 +591,10 @@ kstat_this_cpu.irqs[irq]++; +#ifndef CONFIG_IPIPE if (desc->chip->ack) desc->chip->ack(irq); +#endif /* CONFIG_IPIPE */ action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -529,6 +604,22 @@ desc->chip->eoi(irq); } +#ifdef CONFIG_IPIPE + +void fastcall __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc) +{ + if (desc->chip->ack) + desc->chip->ack(irq); +} + +void fastcall __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc) +{ + if (desc->chip->eoi) + desc->chip->eoi(irq); +} + +#endif /* CONFIG_IPIPE */ + #endif /* CONFIG_SMP */ void @@ -548,6 +639,30 @@ if (!handle) handle = handle_bad_irq; +#ifdef CONFIG_IPIPE + else if (handle == &handle_simple_irq) { + desc->ipipe_ack = &__ipipe_ack_simple_irq; + desc->ipipe_end = &__ipipe_end_simple_irq; + } + else if (handle == &handle_level_irq) { + desc->ipipe_ack = &__ipipe_ack_level_irq; + desc->ipipe_end = &__ipipe_end_level_irq; + } + else if (handle == &handle_edge_irq) { + desc->ipipe_ack = &__ipipe_ack_edge_irq; + desc->ipipe_end = &__ipipe_end_edge_irq; + } + else if (handle == &handle_fasteoi_irq) { + desc->ipipe_ack = &__ipipe_ack_fasteoi_irq; + desc->ipipe_end = &__ipipe_end_fasteoi_irq; + } +#ifdef CONFIG_SMP + else if (handle == &handle_percpu_irq) { + desc->ipipe_ack = &__ipipe_ack_percpu_irq; + desc->ipipe_end = &__ipipe_end_percpu_irq; + } +#endif /* CONFIG_SMP */ +#endif /* CONFIG_IPIPE */ else if (desc->chip == &no_irq_chip) { printk(KERN_WARNING "Trying to install %sinterrupt handler " "for IRQ%d\n", is_chained ? "chained " : "", irq); @@ -559,7 +674,17 @@ * dummy_irq_chip for easy transition. */ desc->chip = &dummy_irq_chip; +#ifdef CONFIG_IPIPE + desc->ipipe_ack = &__ipipe_noack_irq; + desc->ipipe_end = &__ipipe_noend_irq; +#endif /* CONFIG_IPIPE */ } +#ifdef CONFIG_IPIPE + else { + desc->ipipe_ack = &__ipipe_ack_bad_irq; + desc->ipipe_end = &__ipipe_noend_irq; + } +#endif /* CONFIG_IPIPE */ spin_lock_irqsave(&desc->lock, flags); @@ -569,6 +694,10 @@ mask_ack_irq(desc, irq); desc->status |= IRQ_DISABLED; desc->depth = 1; +#ifdef CONFIG_IPIPE + desc->ipipe_ack = &__ipipe_ack_bad_irq; + desc->ipipe_end = &__ipipe_noend_irq; +#endif /* CONFIG_IPIPE */ } desc->handle_irq = handle; desc->name = name; diff -Naur linux-2.6.21.5.orig/kernel/Makefile linux-2.6.21.5/kernel/Makefile --- linux-2.6.21.5.orig/kernel/Makefile 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/Makefile 2007-08-07 10:56:22.000000000 +0200 @@ -47,6 +47,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RELAY) += relay.o +obj-$(CONFIG_IPIPE) += ipipe/ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff -Naur linux-2.6.21.5.orig/kernel/printk.c linux-2.6.21.5/kernel/printk.c --- linux-2.6.21.5.orig/kernel/printk.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/printk.c 2007-08-07 10:56:22.000000000 +0200 @@ -479,6 +479,41 @@ return 0; } +#ifdef CONFIG_IPIPE + +static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED; + +static int __ipipe_printk_fill; + +static char __ipipe_printk_buf[__LOG_BUF_LEN]; + +void __ipipe_flush_printk (unsigned virq, void *cookie) +{ + char *p = __ipipe_printk_buf; + int len, lmax, out = 0; + unsigned long flags; + + goto start; + + do { + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); + start: + lmax = __ipipe_printk_fill; + while (out < lmax) { + len = strlen(p) + 1; + printk("%s",p); + p += len; + out += len; + } + spin_lock_irqsave(&__ipipe_printk_lock, flags); + } + while (__ipipe_printk_fill != lmax); + + __ipipe_printk_fill = 0; + + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); +} + /** * printk - print a kernel message * @fmt: format string @@ -501,6 +536,43 @@ asmlinkage int printk(const char *fmt, ...) { + int r, fbytes, oldcount; + unsigned long flags; + va_list args; + + va_start(args, fmt); + + if (ipipe_current_domain == ipipe_root_domain || + test_bit(IPIPE_SPRINTK_FLAG,&ipipe_current_domain->flags) || + oops_in_progress) { + r = vprintk(fmt, args); + goto out; + } + + spin_lock_irqsave(&__ipipe_printk_lock, flags); + + oldcount = __ipipe_printk_fill; + fbytes = __LOG_BUF_LEN - oldcount; + + if (fbytes > 1) { + r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, + fbytes, fmt, args) + 1; /* account for the null byte */ + __ipipe_printk_fill += r; + } else + r = 0; + + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); + + if (oldcount == 0) + ipipe_trigger_irq(__ipipe_printk_virq); +out: + va_end(args); + + return r; +} +#else /* !CONFIG_IPIPE */ +asmlinkage int printk(const char *fmt, ...) +{ va_list args; int r; @@ -510,6 +582,7 @@ return r; } +#endif /* CONFIG_IPIPE */ /* cpu currently holding logbuf_lock */ static volatile unsigned int printk_cpu = UINT_MAX; diff -Naur linux-2.6.21.5.orig/kernel/sched.c linux-2.6.21.5/kernel/sched.c --- linux-2.6.21.5.orig/kernel/sched.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/sched.c 2007-08-07 10:56:22.000000000 +0200 @@ -1419,7 +1419,7 @@ rq = task_rq_lock(p, &flags); old_state = p->state; - if (!(old_state & state)) + if (!(old_state & state) || (old_state & TASK_NOWAKEUP)) goto out; if (p->array) @@ -1840,6 +1840,8 @@ #endif if (current->set_child_tid) put_user(current->pid, current->set_child_tid); + + ipipe_init_notify(current); } /* @@ -3289,12 +3291,15 @@ long *switch_count; struct rq *rq; + ipipe_check_context(ipipe_root_domain); + /* * Test if we are atomic. Since do_exit() needs to call into * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ - if (unlikely(in_atomic() && !current->exit_state)) { + if (unlikely(!(current->state & TASK_ATOMICSWITCH) && in_atomic() && + !current->exit_state)) { printk(KERN_ERR "BUG: scheduling while atomic: " "%s/0x%08x/%d\n", current->comm, preempt_count(), current->pid); @@ -3305,8 +3310,13 @@ } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); + if (unlikely(current->state & TASK_ATOMICSWITCH)) { + current->state &= ~TASK_ATOMICSWITCH; + goto need_resched_nodisable; + } need_resched: preempt_disable(); +need_resched_nodisable: prev = current; release_kernel_lock(prev); need_resched_nonpreemptible: @@ -3421,6 +3431,8 @@ prepare_task_switch(rq, next); prev = context_switch(rq, prev, next); barrier(); + if (task_hijacked(prev)) + return; /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the 'rq' on its stack @@ -3434,7 +3446,7 @@ if (unlikely(reacquire_kernel_lock(prev) < 0)) goto need_resched_nonpreemptible; preempt_enable_no_resched(); - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) + if (unlikely(test_thread_flag(TIF_NEED_RESCHED) && ipipe_root_domain_p)) goto need_resched; } EXPORT_SYMBOL(schedule); @@ -3452,6 +3464,7 @@ struct task_struct *task = current; int saved_lock_depth; #endif + ipipe_check_context(ipipe_root_domain); /* * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. @@ -4149,6 +4162,7 @@ deactivate_task(p, rq); oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); + ipipe_setsched_notify(p); if (array) { __activate_task(p, rq); /* @@ -6886,3 +6900,50 @@ } #endif + +#ifdef CONFIG_IPIPE + +int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio) +{ + struct prio_array *array; + unsigned long flags; + struct rq *rq; + int oldprio; + + rq = task_rq_lock(p, &flags); + array = p->array; + if (array) + deactivate_task(p, rq); + oldprio = p->prio; + __setscheduler(p, policy, prio); + if (array) { + __activate_task(p, rq); + if (task_running(rq, p)) { + if (p->prio > oldprio) + resched_task(rq->curr); + } else if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); + } + task_rq_unlock(rq, &flags); + + return 0; +} + +EXPORT_SYMBOL(ipipe_setscheduler_root); + +int ipipe_reenter_root (struct task_struct *prev, int policy, int prio) +{ + finish_task_switch(this_rq(), prev); + if (reacquire_kernel_lock(current) < 0) + ; + preempt_enable_no_resched(); + + if (current->policy != policy || current->rt_priority != prio) + return ipipe_setscheduler_root(current,policy,prio); + + return 0; +} + +EXPORT_SYMBOL(ipipe_reenter_root); + +#endif /* CONFIG_IPIPE */ diff -Naur linux-2.6.21.5.orig/kernel/signal.c linux-2.6.21.5/kernel/signal.c --- linux-2.6.21.5.orig/kernel/signal.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/signal.c 2007-08-07 10:56:22.000000000 +0200 @@ -532,6 +532,7 @@ unsigned int mask; set_tsk_thread_flag(t, TIF_SIGPENDING); + ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ /* * For SIGKILL, we want to wake it up in the stopped/traced case. diff -Naur linux-2.6.21.5.orig/kernel/spinlock.c linux-2.6.21.5/kernel/spinlock.c --- linux-2.6.21.5.orig/kernel/spinlock.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/spinlock.c 2007-08-07 10:56:22.000000000 +0200 @@ -88,7 +88,7 @@ * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_LOCKING +#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_IPIPE) _raw_spin_lock(lock); #else _raw_spin_lock_flags(lock, &flags); @@ -305,7 +305,7 @@ * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_PROVE_SPIN_LOCKING +#if defined(CONFIG_PROVE_SPIN_LOCKING) || defined(CONFIG_IPIPE) _raw_spin_lock(lock); #else _raw_spin_lock_flags(lock, &flags); diff -Naur linux-2.6.21.5.orig/kernel/sys.c linux-2.6.21.5/kernel/sys.c --- linux-2.6.21.5.orig/kernel/sys.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/kernel/sys.c 2007-08-07 10:56:22.000000000 +0200 @@ -227,9 +227,11 @@ { int ret; - rcu_read_lock(); + if (ipipe_root_domain_p) + rcu_read_lock(); ret = notifier_call_chain(&nh->head, val, v); - rcu_read_unlock(); + if (ipipe_root_domain_p) + rcu_read_unlock(); return ret; } diff -Naur linux-2.6.21.5.orig/lib/bust_spinlocks.c linux-2.6.21.5/lib/bust_spinlocks.c --- linux-2.6.21.5.orig/lib/bust_spinlocks.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/lib/bust_spinlocks.c 2007-08-07 10:56:22.000000000 +0200 @@ -12,13 +12,16 @@ #include #include #include +#include void __attribute__((weak)) bust_spinlocks(int yes) { if (yes) { + ipipe_trace_panic_freeze(); oops_in_progress = 1; } else { + ipipe_trace_panic_dump(); #ifdef CONFIG_VT unblank_screen(); #endif diff -Naur linux-2.6.21.5.orig/lib/ioremap.c linux-2.6.21.5/lib/ioremap.c --- linux-2.6.21.5.orig/lib/ioremap.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/lib/ioremap.c 2007-08-07 10:56:22.000000000 +0200 @@ -84,8 +84,8 @@ if (err) break; } while (pgd++, addr = next, addr != end); - - flush_cache_vmap(start, end); + __ipipe_pin_range_globally(start, end); + flush_cache_vmap(start, end); return err; } diff -Naur linux-2.6.21.5.orig/lib/Kconfig.debug linux-2.6.21.5/lib/Kconfig.debug --- linux-2.6.21.5.orig/lib/Kconfig.debug 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/lib/Kconfig.debug 2007-08-07 10:56:22.000000000 +0200 @@ -71,6 +71,8 @@ exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in your build tree), to make sure they're suitable. +source "kernel/ipipe/Kconfig.debug" + config DEBUG_KERNEL bool "Kernel debugging" help diff -Naur linux-2.6.21.5.orig/lib/smp_processor_id.c linux-2.6.21.5/lib/smp_processor_id.c --- linux-2.6.21.5.orig/lib/smp_processor_id.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/lib/smp_processor_id.c 2007-08-07 10:56:22.000000000 +0200 @@ -13,6 +13,9 @@ int this_cpu = raw_smp_processor_id(); cpumask_t this_mask; + if (!ipipe_root_domain_p) + goto out; + if (likely(preempt_count)) goto out; diff -Naur linux-2.6.21.5.orig/lib/spinlock_debug.c linux-2.6.21.5/lib/spinlock_debug.c --- linux-2.6.21.5.orig/lib/spinlock_debug.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/lib/spinlock_debug.c 2007-08-07 10:56:22.000000000 +0200 @@ -133,6 +133,8 @@ debug_spin_lock_after(lock); } +EXPORT_SYMBOL(_raw_spin_lock); + int _raw_spin_trylock(spinlock_t *lock) { int ret = __raw_spin_trylock(&lock->raw_lock); @@ -148,12 +150,16 @@ return ret; } +EXPORT_SYMBOL(_raw_spin_trylock); + void _raw_spin_unlock(spinlock_t *lock) { debug_spin_unlock(lock); __raw_spin_unlock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_spin_unlock); + static void rwlock_bug(rwlock_t *lock, const char *msg) { if (!debug_locks_off()) @@ -199,6 +205,8 @@ __raw_read_lock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_read_lock); + int _raw_read_trylock(rwlock_t *lock) { int ret = __raw_read_trylock(&lock->raw_lock); @@ -212,12 +220,16 @@ return ret; } +EXPORT_SYMBOL(_raw_read_trylock); + void _raw_read_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); __raw_read_unlock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_read_unlock); + static inline void debug_write_lock_before(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); @@ -275,6 +287,8 @@ debug_write_lock_after(lock); } +EXPORT_SYMBOL(_raw_write_lock); + int _raw_write_trylock(rwlock_t *lock) { int ret = __raw_write_trylock(&lock->raw_lock); @@ -290,8 +304,12 @@ return ret; } +EXPORT_SYMBOL(_raw_write_trylock); + void _raw_write_unlock(rwlock_t *lock) { debug_write_unlock(lock); __raw_write_unlock(&lock->raw_lock); } + +EXPORT_SYMBOL(_raw_write_unlock); diff -Naur linux-2.6.21.5.orig/Makefile linux-2.6.21.5/Makefile --- linux-2.6.21.5.orig/Makefile 2007-08-07 10:55:04.000000000 +0200 +++ linux-2.6.21.5/Makefile 2007-08-07 10:56:22.000000000 +0200 @@ -490,6 +490,10 @@ include $(srctree)/arch/$(ARCH)/Makefile +ifdef CONFIG_IPIPE_TRACE_MCOUNT +CFLAGS += -pg +endif + ifdef CONFIG_FRAME_POINTER CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else diff -Naur linux-2.6.21.5.orig/mm/memory.c linux-2.6.21.5/mm/memory.c --- linux-2.6.21.5.orig/mm/memory.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/mm/memory.c 2007-08-07 10:56:22.000000000 +0200 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -418,13 +419,41 @@ return pfn_to_page(pfn); } +static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) +{ + /* + * If the source page was a PFN mapping, we don't have + * a "struct page" for it. We do a best-effort copy by + * just copying from the original user address. If that + * fails, we just zero-fill it. Live with it. + */ + if (unlikely(!src)) { + void *kaddr = kmap_atomic(dst, KM_USER0); + void __user *uaddr = (void __user *)(va & PAGE_MASK); + + /* + * This really shouldn't fail, because the page is there + * in the page tables. But it might just be unreadable, + * in which case we just give up and fill the result with + * zeroes. + */ + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) + memset(kaddr, 0, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(dst); + return; + + } + copy_user_highpage(dst, src, va, vma); +} + /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. */ -static inline void +static inline int copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr, int *rss) @@ -466,6 +495,25 @@ * in the parent and the child */ if (is_cow_mapping(vm_flags)) { +#ifdef CONFIG_IPIPE + if (((vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED)) == (VM_LOCKED|VM_PINNED)) { + struct page *old_page = vm_normal_page(vma, addr, pte); + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (!page) + return -ENOMEM; + + cow_user_page(page, old_page, addr, vma); + pte = mk_pte(page, vma->vm_page_prot); + + if (vm_flags & VM_SHARED) + pte = pte_mkclean(pte); + pte = pte_mkold(pte); + + page_dup_rmap(page); + rss[!!PageAnon(page)]++; + goto out_set_pte; + } +#endif /* CONFIG_IPIPE */ ptep_set_wrprotect(src_mm, addr, src_pte); pte = pte_wrprotect(pte); } @@ -487,6 +535,7 @@ out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; } static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -524,7 +573,9 @@ progress++; continue; } - copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); + if (copy_one_pte(dst_mm, src_mm, dst_pte, + src_pte, vma, addr, rss)) + return -ENOMEM; progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); @@ -1170,7 +1221,7 @@ if (err) break; } while (pud++, addr = next, addr != end); - return err; + return 0; } int zeromap_page_range(struct vm_area_struct *vma, @@ -1486,34 +1537,6 @@ return pte; } -static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) -{ - /* - * If the source page was a PFN mapping, we don't have - * a "struct page" for it. We do a best-effort copy by - * just copying from the original user address. If that - * fails, we just zero-fill it. Live with it. - */ - if (unlikely(!src)) { - void *kaddr = kmap_atomic(dst, KM_USER0); - void __user *uaddr = (void __user *)(va & PAGE_MASK); - - /* - * This really shouldn't fail, because the page is there - * in the page tables. But it might just be unreadable, - * in which case we just give up and fill the result with - * zeroes. - */ - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) - memset(kaddr, 0, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(dst); - return; - - } - copy_user_highpage(dst, src, va, vma); -} - /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address @@ -2736,3 +2759,117 @@ return buf - old_buf; } + +#ifdef CONFIG_IPIPE + +static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + spinlock_t *ptl; + pte_t *pte; + + do { + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!pte) + continue; + + if (!pte_present(*pte)) { + pte_unmap_unlock(pte, ptl); + continue; + } + + if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) == VM_FAULT_OOM) + return -ENOMEM; + } while (addr += PAGE_SIZE, addr != end); + return 0; +} + +static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (ipipe_pin_pte_range(mm, pmd, vma, addr, end)) + return -ENOMEM; + } while (pmd++, addr = next, addr != end); + return 0; +} + +static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (ipipe_pin_pmd_range(mm, pud, vma, addr, end)) + return -ENOMEM; + } while (pud++, addr = next, addr != end); + return 0; +} + +int ipipe_disable_ondemand_mappings(struct task_struct *tsk) +{ + unsigned long addr, next, end; + struct vm_area_struct *vma; + struct mm_struct *mm; + int result = 0; + pgd_t *pgd; + + mm = get_task_mm(tsk); + if (!mm) + return -EPERM; + + down_write(&mm->mmap_sem); + if (mm->def_flags & VM_PINNED) + goto done_mm; + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!is_cow_mapping(vma->vm_flags)) + continue; + + addr = vma->vm_start; + end = vma->vm_end; + + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) { + result = -ENOMEM; + goto done_mm; + } + } while (pgd++, addr = next, addr != end); + } + mm->def_flags |= VM_PINNED; + + done_mm: + up_write(&mm->mmap_sem); + mmput(mm); + return result; +} + +EXPORT_SYMBOL(ipipe_disable_ondemand_mappings); + +void __ipipe_pin_range_globally(unsigned long start, unsigned long end) +{ + struct task_struct *p; + + read_lock(&tasklist_lock); + + for_each_process(p) + if (p->mm) + __ipipe_pin_range_mapping(p->mm, start, end); + + read_unlock(&tasklist_lock); +} + +#endif diff -Naur linux-2.6.21.5.orig/mm/mlock.c linux-2.6.21.5/mm/mlock.c --- linux-2.6.21.5.orig/mm/mlock.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/mm/mlock.c 2007-08-07 10:56:22.000000000 +0200 @@ -162,10 +162,10 @@ static int do_mlockall(int flags) { struct vm_area_struct * vma, * prev = NULL; - unsigned int def_flags = 0; + unsigned int def_flags = current->mm->def_flags & VM_PINNED; if (flags & MCL_FUTURE) - def_flags = VM_LOCKED; + def_flags |= VM_LOCKED; current->mm->def_flags = def_flags; if (flags == MCL_FUTURE) goto out; diff -Naur linux-2.6.21.5.orig/mm/vmalloc.c linux-2.6.21.5/mm/vmalloc.c --- linux-2.6.21.5.orig/mm/vmalloc.c 2007-08-07 10:55:39.000000000 +0200 +++ linux-2.6.21.5/mm/vmalloc.c 2007-08-07 10:56:22.000000000 +0200 @@ -156,6 +156,7 @@ if (err) break; } while (pgd++, addr = next, addr != end); + __ipipe_pin_range_globally((unsigned long) area->addr, end); flush_cache_vmap((unsigned long) area->addr, end); return err; } --------------000602050800000000030502--