From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <47108B7C.4080906@domain.hid> Date: Sat, 13 Oct 2007 11:10:20 +0200 From: Jan Kiszka MIME-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="------------enig05F65E2EA842680795110C65" Sender: jan.kiszka@domain.hid Subject: [Xenomai-core] [PREVIEW] ipipe-2.6.23-i386-1.10-07 List-Id: "Xenomai life and development \(bug reports, patches, discussions\)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: adeos-main@gna.org Cc: Xenomai-core@domain.hid This is an OpenPGP/MIME signed message (RFC 2440 and 3156) --------------enig05F65E2EA842680795110C65 Content-Type: multipart/mixed; boundary="------------050602070300060801070403" This is a multi-part message in MIME format. --------------050602070300060801070403 Content-Type: text/plain; charset=ISO-8859-15 Content-Transfer-Encoding: quoted-printable This is a forward port of the latest ipipe patch for 2.6.22 to 2.6.23, additionally based on Philippe's earlier work for -rc2. It runs surprisingly well here, so I would like to invite more testers to the party. You need Xenomai SVN head + my timer setup fix [1] to create a test platform. I had to perform one small magic dance to get the patch compiling due to inclusion hell around ipipe_base.h, check arch/i386/boot/compressed/Makefile. Jan [1] https://mail.gna.org/public/xenomai-core/2007-10/msg00068.html --------------050602070300060801070403 Content-Type: text/x-patch; name="adeos-ipipe-2.6.23-i386-1.10-07.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="adeos-ipipe-2.6.23-i386-1.10-07.patch" Index: linux-2.6.23/Makefile =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/Makefile +++ linux-2.6.23/Makefile @@ -491,6 +491,10 @@ endif =20 include $(srctree)/arch/$(ARCH)/Makefile =20 +ifdef CONFIG_IPIPE_TRACE_MCOUNT +CFLAGS +=3D -pg +endif + ifdef CONFIG_FRAME_POINTER CFLAGS +=3D -fno-omit-frame-pointer -fno-optimize-sibling-calls else Index: linux-2.6.23/arch/i386/Kconfig =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/Kconfig +++ linux-2.6.23/arch/i386/Kconfig @@ -217,7 +217,7 @@ endchoice config PARAVIRT bool "Paravirtualization support (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on !(X86_VISWS || X86_VOYAGER) + depends on !(X86_VISWS || X86_VOYAGER || IPIPE) help Paravirtualization is a way of running multiple instances of Linux on the same machine, under a hypervisor. This option @@ -315,6 +315,8 @@ config SCHED_MC =20 source "kernel/Kconfig.preempt" =20 +source "kernel/ipipe/Kconfig" + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) Index: linux-2.6.23/arch/i386/kernel/Makefile =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/Makefile +++ linux-2.6.23/arch/i386/kernel/Makefile @@ -32,6 +32,8 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) +=3D summit. obj-$(CONFIG_KPROBES) +=3D kprobes.o obj-$(CONFIG_MODULES) +=3D module.o obj-y +=3D sysenter.o vsyscall.o +obj-$(CONFIG_IPIPE) +=3D ipipe.o +obj-$(CONFIG_IPIPE_TRACE_MCOUNT) +=3D mcount.o obj-$(CONFIG_ACPI_SRAT) +=3D srat.o obj-$(CONFIG_EFI) +=3D efi.o efi_stub.o obj-$(CONFIG_DOUBLEFAULT) +=3D doublefault.o Index: linux-2.6.23/arch/i386/kernel/apic.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/apic.c +++ linux-2.6.23/arch/i386/kernel/apic.c @@ -250,7 +250,7 @@ static void lapic_timer_setup(enum clock if (!local_apic_timer_verify_ok) return; =20 - local_irq_save(flags); + local_irq_save_hw(flags); =20 switch (mode) { case CLOCK_EVT_MODE_PERIODIC: @@ -269,7 +269,7 @@ static void lapic_timer_setup(enum clock break; } =20 - local_irq_restore(flags); + local_irq_restore_hw(flags); } =20 /* @@ -716,13 +716,18 @@ void lapic_shutdown(void) if (!cpu_has_apic) return; =20 - local_irq_save(flags); + local_irq_save_hw(flags); clear_local_APIC(); =20 if (enabled_via_apicbase) disable_local_APIC(); =20 - local_irq_restore(flags); + local_irq_restore_hw(flags); +} + +int __ipipe_check_lapic(void) +{ + return !(lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY); } =20 /* @@ -1272,7 +1277,7 @@ void smp_spurious_interrupt(struct pt_re */ v =3D apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); + __ack_APIC_irq(); =20 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " @@ -1329,6 +1334,12 @@ void __init apic_intr_init(void) #ifdef CONFIG_X86_MCE_P4THERMAL set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#ifdef CONFIG_IPIPE + set_intr_gate(IPIPE_SERVICE_VECTOR0, ipipe_ipi0); + set_intr_gate(IPIPE_SERVICE_VECTOR1, ipipe_ipi1); + set_intr_gate(IPIPE_SERVICE_VECTOR2, ipipe_ipi2); + set_intr_gate(IPIPE_SERVICE_VECTOR3, ipipe_ipi3); +#endif } =20 /** @@ -1467,9 +1478,9 @@ static int lapic_suspend(struct sys_devi apic_pm_state.apic_thmr =3D apic_read(APIC_LVTTHMR); #endif =20 - local_irq_save(flags); + local_irq_save_hw(flags); disable_local_APIC(); - local_irq_restore(flags); + local_irq_restore_hw(flags); return 0; } =20 @@ -1484,7 +1495,7 @@ static int lapic_resume(struct sys_devic =20 maxlvt =3D lapic_get_maxlvt(); =20 - local_irq_save(flags); + local_irq_save_hw(flags); =20 /* * Make sure the APICBASE points to the right address @@ -1519,7 +1530,7 @@ static int lapic_resume(struct sys_devic apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - local_irq_restore(flags); + local_irq_restore_hw(flags); return 0; } =20 Index: linux-2.6.23/arch/i386/kernel/entry.S =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/entry.S +++ linux-2.6.23/arch/i386/kernel/entry.S @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,58 @@ DF_MASK =3D 0x00000400=20 NT_MASK =3D 0x00004000 VM_MASK =3D 0x00020000 =20 +#ifdef CONFIG_IPIPE +#define EMULATE_ROOT_IRET(bypass) \ + call __ipipe_unstall_iret_root ; \ + TRACE_IRQS_ON ; \ + bypass: \ + movl PT_EAX(%esp),%eax +#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,= %eax +#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \ + call __ipipe_syscall_root ; \ + testl %eax,%eax ; \ + js bypass1 ; \ + jne bypass2 ; \ + movl PT_ORIG_EAX(%esp),%eax +#define PUSH_XCODE(v) pushl $ ex_ ## v +#define PUSH_XVEC(v) pushl $ ex_ ## v +#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \ + call __ipipe_handle_exception ; \ + testl %eax,%eax ; \ + jnz restore_nocheck_notrace +#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \ + movl %ecx, %ds ; \ + movl %ecx, %es ; \ + movl %esp, %eax ; \ + movl $ex_ ## code,%edx ; \ + call __ipipe_divert_exception ; \ + testl %eax,%eax ; \ + jnz restore_nocheck_notrace + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +# define IPIPE_TRACE_IRQ_ENTER \ + lea PT_EIP-4(%esp), %ebp; \ + movl PT_ORIG_EAX(%esp), %eax; \ + call ipipe_trace_begin +# define IPIPE_TRACE_IRQ_EXIT \ + pushl %eax; \ + movl PT_ORIG_EAX+4(%esp), %eax; \ + call ipipe_trace_end; \ + popl %eax +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ +#define IPIPE_TRACE_IRQ_ENTER +#define IPIPE_TRACE_IRQ_EXIT +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ +#else /* !CONFIG_IPIPE */ +#define EMULATE_ROOT_IRET(bypass) +#define TEST_PREEMPTIBLE(regs) testl $IF_MASK,PT_EFLAGS(regs) +#define CATCH_ROOT_SYSCALL(bypass1,bypass2) +#define PUSH_XCODE(v) pushl $v +#define PUSH_XVEC(v) pushl v +#define HANDLE_EXCEPTION(code) call *%code +#define DIVERT_EXCEPTION(code) +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_PREEMPT #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_= OFF #else @@ -214,6 +267,7 @@ VM_MASK =3D 0x00020000 CFI_OFFSET ebx, PT_EBX-PT_OLDESP =20 ENTRY(ret_from_fork) + ENABLE_INTERRUPTS_HW_COND CFI_STARTPROC pushl %eax CFI_ADJUST_CFA_OFFSET 4 @@ -241,7 +295,7 @@ END(ret_from_fork) RING0_PTREGS_FRAME ret_from_exception: preempt_stop(CLBR_ANY) -ret_from_intr: +ENTRY(ret_from_intr) GET_THREAD_INFO(%ebp) check_userspace: movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS @@ -263,14 +317,14 @@ END(ret_from_exception) =20 #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) + DISABLE_INTERRUPTS_HW(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_nocheck need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ? + TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq jmp need_resched @@ -293,7 +347,7 @@ sysenter_past_esp: * No need to follow this irqs on/off section: the syscall * disabled irqs and here we enable it straight after entry: */ - ENABLE_INTERRUPTS(CLBR_NONE) + ENABLE_INTERRUPTS_HW(CLBR_NONE) pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET ss, 0*/ @@ -330,6 +384,7 @@ sysenter_past_esp: CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) + CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit) =20 /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not te= stb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_A= UDIT),TI_flags(%ebp) @@ -338,16 +393,20 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) +sysenter_tail: DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work /* if something modifies registers it must also disable sysexit */ + EMULATE_ROOT_IRET(sysenter_exit) movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp - TRACE_IRQS_ON +#ifndef CONFIG_IPIPE + TRACE_IRQS_ON +#endif 1: mov PT_FS(%esp), %fs ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC @@ -367,6 +426,7 @@ ENTRY(system_call) CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL GET_THREAD_INFO(%ebp) + CATCH_ROOT_SYSCALL(syscall_exit,restore_nocheck_notrace) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not te= stb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_A= UDIT),TI_flags(%ebp) @@ -401,7 +461,11 @@ restore_all: CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: +#ifdef CONFIG_IPIPE + call __ipipe_unstall_iret_root +#else /* !CONFIG_IPIPE */ TRACE_IRQS_IRET +#endif /* CONFIG_IPIPE */ restore_nocheck_notrace: RESTORE_REGS addl $4, %esp # skip orig_eax/error_code @@ -410,7 +474,7 @@ restore_nocheck_notrace: .section .fixup,"ax" iret_exc: pushl $0 # no error code - pushl $do_iret_error + PUSH_XCODE(do_iret_error) jmp error_code .previous .section __ex_table,"a" @@ -451,7 +515,7 @@ ldt_ss: CFI_ADJUST_CFA_OFFSET 4 pushl %eax CFI_ADJUST_CFA_OFFSET 4 - DISABLE_INTERRUPTS(CLBR_EAX) + DISABLE_INTERRUPTS_HW(CLBR_EAX) TRACE_IRQS_OFF lss (%esp), %esp CFI_ADJUST_CFA_OFFSET -8 @@ -466,6 +530,7 @@ work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: + ENABLE_INTERRUPTS_HW_COND call schedule DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending @@ -605,6 +670,47 @@ END(irq_entries_start) END(interrupt) .previous =20 +#ifdef CONFIG_IPIPE + ALIGN +common_interrupt: + SAVE_ALL + IPIPE_TRACE_IRQ_ENTER + call __ipipe_handle_irq + IPIPE_TRACE_IRQ_EXIT + testl %eax,%eax + jnz ret_from_intr + RESTORE_REGS + addl $4, %esp + iret + CFI_ENDPROC + +#define BUILD_INTERRUPT(name, nr) \ +ENTRY(name) \ + RING0_INT_FRAME; \ + pushl $~(nr-FIRST_SYSTEM_VECTOR+NR_IRQS);\ + CFI_ADJUST_CFA_OFFSET 4; \ + SAVE_ALL; \ + IPIPE_TRACE_IRQ_ENTER; \ + call __ipipe_handle_irq; \ + IPIPE_TRACE_IRQ_EXIT; \ + testl %eax,%eax; \ + jnz ret_from_intr; \ + RESTORE_REGS; \ + addl $4, %esp; \ + iret; \ + CFI_ENDPROC + +#ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(ipipe_ipi0,IPIPE_SERVICE_VECTOR0) + BUILD_INTERRUPT(ipipe_ipi1,IPIPE_SERVICE_VECTOR1) + BUILD_INTERRUPT(ipipe_ipi2,IPIPE_SERVICE_VECTOR2) + BUILD_INTERRUPT(ipipe_ipi3,IPIPE_SERVICE_VECTOR3) +#ifdef CONFIG_SMP + BUILD_INTERRUPT(ipipe_ipiX,IPIPE_CRITICAL_VECTOR) +#endif +#endif + +#else /* !CONFIG_IPIPE */ /* * the CPU automatically disables interrupts when executing an IRQ vecto= r, * so IRQ-flags tracing has to follow that: @@ -632,12 +738,14 @@ ENTRY(name) \ CFI_ENDPROC; \ ENDPROC(name) =20 +#endif /* !CONFIG_IPIPE */ + /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" =20 KPROBE_ENTRY(page_fault) RING0_EC_FRAME - pushl $do_page_fault + PUSH_XCODE(do_page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: @@ -688,7 +796,7 @@ error_code: movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer - call *%edi + HANDLE_EXCEPTION(edi) jmp ret_from_exception CFI_ENDPROC KPROBE_END(page_fault) @@ -697,7 +805,7 @@ ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_error + PUSH_XCODE(do_coprocessor_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -707,7 +815,7 @@ ENTRY(simd_coprocessor_error) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_simd_coprocessor_error + PUSH_XCODE(do_simd_coprocessor_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -718,6 +826,7 @@ ENTRY(device_not_available) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + DIVERT_EXCEPTION(device_not_available) GET_CR0_INTO_EAX testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate @@ -771,6 +880,7 @@ debug_stack_correct: pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + DIVERT_EXCEPTION(do_debug) xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug @@ -889,6 +999,7 @@ KPROBE_ENTRY(int3) pushl $-1 # mark this as an int CFI_ADJUST_CFA_OFFSET 4 SAVE_ALL + DIVERT_EXCEPTION(do_int3) xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 @@ -900,7 +1011,7 @@ ENTRY(overflow) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_overflow + PUSH_XCODE(do_overflow) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -910,7 +1021,7 @@ ENTRY(bounds) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_bounds + PUSH_XCODE(do_bounds) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -920,7 +1031,7 @@ ENTRY(invalid_op) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_invalid_op + PUSH_XCODE(do_invalid_op) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -930,7 +1041,7 @@ ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_coprocessor_segment_overrun + PUSH_XCODE(do_coprocessor_segment_overrun) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -938,7 +1049,7 @@ END(coprocessor_segment_overrun) =20 ENTRY(invalid_TSS) RING0_EC_FRAME - pushl $do_invalid_TSS + PUSH_XCODE(do_invalid_TSS) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -946,7 +1057,7 @@ END(invalid_TSS) =20 ENTRY(segment_not_present) RING0_EC_FRAME - pushl $do_segment_not_present + PUSH_XCODE(do_segment_not_present) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -954,7 +1065,7 @@ END(segment_not_present) =20 ENTRY(stack_segment) RING0_EC_FRAME - pushl $do_stack_segment + PUSH_XCODE(do_stack_segment) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -962,7 +1073,7 @@ END(stack_segment) =20 KPROBE_ENTRY(general_protection) RING0_EC_FRAME - pushl $do_general_protection + PUSH_XCODE(do_general_protection) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -970,7 +1081,7 @@ KPROBE_END(general_protection) =20 ENTRY(alignment_check) RING0_EC_FRAME - pushl $do_alignment_check + PUSH_XCODE(do_alignment_check) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -980,7 +1091,7 @@ ENTRY(divide_error) RING0_INT_FRAME pushl $0 # no error code CFI_ADJUST_CFA_OFFSET 4 - pushl $do_divide_error + PUSH_XCODE(do_divide_error) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -991,7 +1102,7 @@ ENTRY(machine_check) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl machine_check_vector + PUSH_XVEC(machine_check_vector) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC @@ -1002,7 +1113,7 @@ ENTRY(spurious_interrupt_bug) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 - pushl $do_spurious_interrupt_bug + PUSH_XCODE(do_spurious_interrupt_bug) CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC Index: linux-2.6.23/arch/i386/kernel/i8253.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/i8253.c +++ linux-2.6.23/arch/i386/kernel/i8253.c @@ -4,6 +4,7 @@ */ #include #include +#include #include #include #include @@ -15,7 +16,7 @@ #include #include =20 -DEFINE_SPINLOCK(i8253_lock); +IPIPE_DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); =20 /* @@ -133,6 +134,12 @@ static cycle_t pit_read(void) static int old_count; static u32 old_jifs; =20 +#ifdef CONFIG_IPIPE + if (!__ipipe_pipeline_head_p(ipipe_root_domain)) + /* We don't really own the PIT. */ + return (cycle_t)(jiffies * LATCH) + (LATCH - 1) - old_count; +#endif /* CONFIG_IPIPE */ + spin_lock_irqsave(&i8253_lock, flags); /* * Although our caller may have the read side of xtime_lock, Index: linux-2.6.23/arch/i386/kernel/i8259.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/i8259.c +++ linux-2.6.23/arch/i386/kernel/i8259.c @@ -34,7 +34,7 @@ */ =20 static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +IPIPE_DEFINE_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); =20 static struct irq_chip i8259A_chip =3D { @@ -71,6 +71,7 @@ void disable_8259A_irq(unsigned int irq) unsigned long flags; =20 spin_lock_irqsave(&i8259A_lock, flags); + ipipe_irq_lock(irq); cached_irq_mask |=3D mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); @@ -81,15 +82,18 @@ void disable_8259A_irq(unsigned int irq) =20 void enable_8259A_irq(unsigned int irq) { - unsigned int mask =3D ~(1 << irq); + unsigned int mask =3D (1 << irq); unsigned long flags; =20 spin_lock_irqsave(&i8259A_lock, flags); - cached_irq_mask &=3D mask; - if (irq & 8) - outb(cached_slave_mask, PIC_SLAVE_IMR); - else - outb(cached_master_mask, PIC_MASTER_IMR); + if (cached_irq_mask & mask) { + cached_irq_mask &=3D ~mask; + if (irq & 8) + outb(cached_slave_mask, PIC_SLAVE_IMR); + else + outb(cached_master_mask, PIC_MASTER_IMR); + ipipe_irq_unlock(irq); + } spin_unlock_irqrestore(&i8259A_lock, flags); } =20 @@ -170,6 +174,15 @@ static void mask_and_ack_8259A(unsigned=20 */ if (cached_irq_mask & irqmask) goto spurious_8259A_irq; +#ifdef CONFIG_IPIPE + if (irq =3D=3D 0) { + /* Fast timer ack -- don't mask (unless supposedly + spurious) */ + outb(0x60, PIC_MASTER_CMD); /* Specific EOI to master. */ + spin_unlock_irqrestore(&i8259A_lock, flags); + return; + } +#endif /* CONFIG_IPIPE */ cached_irq_mask |=3D irqmask; =20 handle_real_irq: Index: linux-2.6.23/arch/i386/kernel/io_apic.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/io_apic.c +++ linux-2.6.23/arch/i386/kernel/io_apic.c @@ -56,8 +56,8 @@ atomic_t irq_mis_count; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 =3D { -1, -1 }; =20 -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static IPIPE_DEFINE_SPINLOCK(ioapic_lock); +static IPIPE_DEFINE_SPINLOCK(vector_lock); =20 int timer_over_8254 __initdata =3D 1; =20 @@ -278,6 +278,7 @@ static void mask_IO_APIC_irq (unsigned i unsigned long flags; =20 spin_lock_irqsave(&ioapic_lock, flags); + ipipe_irq_lock(irq); __mask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -288,6 +289,7 @@ static void unmask_IO_APIC_irq (unsigned =20 spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(irq); + ipipe_irq_unlock(irq); spin_unlock_irqrestore(&ioapic_lock, flags); } =20 @@ -729,8 +731,10 @@ late_initcall(balanced_irq_init); #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) { + unsigned long flags; unsigned int cfg; =20 + local_irq_save_hw_cond(flags); /* * Wait for idle. */ @@ -740,6 +744,7 @@ void fastcall send_IPI_self(int vector) * Send the IPI. The write to APIC_ICR fires this off. */ apic_write_around(APIC_ICR, cfg); + local_irq_restore_hw_cond(flags); } #endif /* !CONFIG_SMP */ =20 @@ -1939,6 +1944,7 @@ static unsigned int startup_ioapic_irq(u was_pending =3D 1; } __unmask_IO_APIC_irq(irq); + ipipe_irq_unlock(irq); spin_unlock_irqrestore(&ioapic_lock, flags); =20 return was_pending; @@ -1946,8 +1952,10 @@ static unsigned int startup_ioapic_irq(u =20 static void ack_ioapic_irq(unsigned int irq) { +#ifndef CONFIG_IPIPE move_native_irq(irq); - ack_APIC_irq(); +#endif /* CONFIG_IPIPE */ + __ack_APIC_irq(); } =20 static void ack_ioapic_quirk_irq(unsigned int irq) @@ -1955,7 +1963,9 @@ static void ack_ioapic_quirk_irq(unsigne unsigned long v; int i; =20 +#ifndef CONFIG_IPIPE move_native_irq(irq); +#endif /* CONFIG_IPIPE */ /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -1979,7 +1989,7 @@ static void ack_ioapic_quirk_irq(unsigne =20 v =3D apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); =20 - ack_APIC_irq(); + __ack_APIC_irq(); =20 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); @@ -1988,6 +1998,17 @@ static void ack_ioapic_quirk_irq(unsigne __unmask_and_level_IO_APIC_irq(irq); spin_unlock(&ioapic_lock); } + +#ifdef CONFIG_IPIPE +/* + * Prevent low priority IRQs grabbed by high priority domains from + * being delayed, waiting for a high priority interrupt handler + * running in a low priority domain to complete. + */ + spin_lock(&ioapic_lock); + __mask_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); +#endif } =20 static int ioapic_retrigger_irq(unsigned int irq) @@ -2049,23 +2070,29 @@ static inline void init_IO_APIC_traps(vo =20 static void ack_apic(unsigned int irq) { - ack_APIC_irq(); + __ack_APIC_irq(); } =20 static void mask_lapic_irq (unsigned int irq) { - unsigned long v; + unsigned long v, flags; =20 + spin_lock_irqsave(&ioapic_lock, flags); + ipipe_irq_lock(irq); v =3D apic_read(APIC_LVT0); apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + spin_unlock_irqrestore(&ioapic_lock, flags); } =20 static void unmask_lapic_irq (unsigned int irq) { - unsigned long v; + unsigned long v, flags; =20 + spin_lock_irqsave(&ioapic_lock, flags); v =3D apic_read(APIC_LVT0); apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); + ipipe_irq_unlock(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); } =20 static struct irq_chip lapic_chip __read_mostly =3D { Index: linux-2.6.23/arch/i386/kernel/ipipe.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/arch/i386/kernel/ipipe.c @@ -0,0 +1,817 @@ +/* -*- linux-c -*- + * linux/arch/i386/kernel/ipipe.c + * + * Copyright (C) 2002-2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modif= y + * it under the terms of the GNU General Public License as published b= y + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 0213= 9, + * USA; either version 2 of the License, or (at your option) any later= + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-130= 7, USA. + * + * Architecture-dependent I-PIPE support for x86. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif /* CONFIG_X86_IO_APIC */ +#include +#include +#endif /* CONFIG_X86_LOCAL_APIC */ + +extern struct clock_event_device *global_clock_event; + +extern struct clock_event_device pit_clockevent; + +int __ipipe_tick_irq; + +#ifdef CONFIG_SMP + +static cpumask_t __ipipe_cpu_sync_map; + +static cpumask_t __ipipe_cpu_lock_map; + +static IPIPE_DEFINE_SPINLOCK(__ipipe_cpu_barrier); + +static atomic_t __ipipe_critical_count =3D ATOMIC_INIT(0); + +static void (*__ipipe_cpu_sync) (void); + +#endif /* CONFIG_SMP */ + +/* ipipe_trigger_irq() -- Push the interrupt at front of the pipeline + just like if it has been actually received from a hw source. Also + works for virtual interrupts. */ + +int fastcall ipipe_trigger_irq(unsigned irq) +{ + struct pt_regs regs; + unsigned long flags; + + if (irq >=3D IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) && + !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + local_irq_save_hw(flags); + + regs.orig_eax =3D irq; /* Won't be acked */ + regs.xcs =3D __KERNEL_CS; + regs.eflags =3D flags; + + __ipipe_handle_irq(regs); + + local_irq_restore_hw(flags); + + return 1; +} + +int ipipe_get_sysinfo(struct ipipe_sysinfo *info) +{ + info->ncpus =3D num_online_cpus(); + info->cpufreq =3D ipipe_cpu_freq(); + info->archdep.tmirq =3D __ipipe_tick_irq; +#ifdef CONFIG_X86_TSC + info->archdep.tmfreq =3D ipipe_cpu_freq(); +#else /* !CONFIG_X86_TSC */ + info->archdep.tmfreq =3D CLOCK_TICK_RATE; +#endif /* CONFIG_X86_TSC */ + + return 0; +} + +fastcall unsigned int do_IRQ(struct pt_regs *regs); +fastcall void smp_apic_timer_interrupt(struct pt_regs *regs); +fastcall void smp_spurious_interrupt(struct pt_regs *regs); +fastcall void smp_error_interrupt(struct pt_regs *regs); +fastcall void smp_thermal_interrupt(struct pt_regs *regs); +fastcall void smp_reschedule_interrupt(struct pt_regs *regs); +fastcall void smp_invalidate_interrupt(struct pt_regs *regs); +fastcall void smp_call_function_interrupt(struct pt_regs *regs); + +static int __ipipe_ack_irq(unsigned irq) +{ + irq_desc_t *desc =3D irq_desc + irq; + desc->ipipe_ack(irq, desc); + return 1; +} + +void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq) +{ + irq_desc[irq].status &=3D ~IRQ_DISABLED; +} + +#ifdef CONFIG_X86_LOCAL_APIC + +static int __ipipe_noack_apic(unsigned irq) +{ + return 1; +} + +int __ipipe_ack_apic(unsigned irq) +{ + __ack_APIC_irq(); + return 1; +} + +static void __ipipe_null_handler(unsigned irq, void *cookie) +{ +} + +#endif /* CONFIG_X86_LOCAL_APIC */ + +/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw + interrupts are off, and secondary CPUs are still lost in space. */ + +void __init __ipipe_enable_pipeline(void) +{ + unsigned irq; + +#ifdef CONFIG_X86_LOCAL_APIC + + /* Map the APIC system vectors. */ + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR), + (ipipe_irq_handler_t)&smp_apic_timer_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(SPURIOUS_APIC_VECTOR), + (ipipe_irq_handler_t)&smp_spurious_interrupt, + NULL, + &__ipipe_noack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(ERROR_APIC_VECTOR), + (ipipe_irq_handler_t)&smp_error_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0), + &__ipipe_null_handler, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1), + &__ipipe_null_handler, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2), + &__ipipe_null_handler, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3), + &__ipipe_null_handler, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + +#ifdef CONFIG_X86_MCE_P4THERMAL + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(THERMAL_APIC_VECTOR), + (ipipe_irq_handler_t)&smp_thermal_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + __ipipe_tick_irq =3D global_clock_event =3D=3D &pit_clockevent ? 0 + : ipipe_apic_vector_irq(LOCAL_TIMER_VECTOR); + +#else /* !CONFIG_X86_LOCAL_APIC */ + + __ipipe_tick_irq =3D 0; + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_SMP + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(RESCHEDULE_VECTOR), + (ipipe_irq_handler_t)&smp_reschedule_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(INVALIDATE_TLB_VECTOR), + (ipipe_irq_handler_t)&smp_invalidate_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ipipe_apic_vector_irq(CALL_FUNCTION_VECTOR), + (ipipe_irq_handler_t)&smp_call_function_interrupt, + NULL, + &__ipipe_ack_apic, + IPIPE_STDROOT_MASK); + +#endif /* CONFIG_SMP */ + + /* Finally, virtualize the remaining ISA and IO-APIC + * interrupts. Interrupts which have already been virtualized + * will just beget a silent -EPERM error since + * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */ + + for (irq =3D 0; irq < NR_IRQS; irq++) + /* Fails for IPIPE_CRITICAL_IPI but that's ok. */ + ipipe_virtualize_irq(ipipe_root_domain, + irq, + (ipipe_irq_handler_t)&do_IRQ, + NULL, + &__ipipe_ack_irq, + IPIPE_STDROOT_MASK); + +#ifdef CONFIG_X86_LOCAL_APIC + /* Eventually allow these vectors to be reprogrammed. */ + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &=3D ~IPIPE_SYSTEM_= MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &=3D ~IPIPE_SYSTEM_= MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &=3D ~IPIPE_SYSTEM_= MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &=3D ~IPIPE_SYSTEM_= MASK; +#endif /* CONFIG_X86_LOCAL_APIC */ +} + +#ifdef CONFIG_SMP + +cpumask_t __ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) +{ + cpumask_t oldmask =3D irq_desc[irq].affinity; + + if (irq_desc[irq].chip->set_affinity =3D=3D NULL) + return CPU_MASK_NONE; + + if (cpus_empty(cpumask)) + return oldmask; /* Return mask value -- no change. */ + + cpus_and(cpumask,cpumask,cpu_online_map); + + if (cpus_empty(cpumask)) + return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. = */ + + irq_desc[irq].chip->set_affinity(irq,cpumask); + + return oldmask; +} + +int fastcall __ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) +{ + unsigned long flags; + int self; + + if (ipi !=3D IPIPE_SERVICE_IPI0 && + ipi !=3D IPIPE_SERVICE_IPI1 && + ipi !=3D IPIPE_SERVICE_IPI2 && + ipi !=3D IPIPE_SERVICE_IPI3) + return -EINVAL; + + local_irq_save_hw(flags); + + self =3D cpu_isset(ipipe_processor_id(),cpumask); + cpu_clear(ipipe_processor_id(), cpumask); + + if (!cpus_empty(cpumask)) + send_IPI_mask(cpumask,ipipe_apic_irq_vector(ipi)); + + if (self) + ipipe_trigger_irq(ipi); + + local_irq_restore_hw(flags); + + return 0; +} + +/* Always called with hw interrupts off. */ + +void __ipipe_do_critical_sync(unsigned irq, void *cookie) +{ + int cpu =3D ipipe_processor_id(); + + cpu_set(cpu, __ipipe_cpu_sync_map); + + /* Now we are in sync with the lock requestor running on another + CPU. Enter a spinning wait until he releases the global + lock. */ + spin_lock(&__ipipe_cpu_barrier); + + /* Got it. Now get out. */ + + if (__ipipe_cpu_sync) + /* Call the sync routine if any. */ + __ipipe_cpu_sync(); + + spin_unlock(&__ipipe_cpu_barrier); + + cpu_clear(cpu, __ipipe_cpu_sync_map); +} + +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd) +{ + ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge =3D &__ipipe_ack_apic; + ipd->irqs[IPIPE_CRITICAL_IPI].handler =3D &__ipipe_do_critical_sync; + ipd->irqs[IPIPE_CRITICAL_IPI].cookie =3D NULL; + /* Immediately handle in the current domain but *never* pass */ + ipd->irqs[IPIPE_CRITICAL_IPI].control =3D + IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; +} + +#endif /* CONFIG_SMP */ + +/* ipipe_critical_enter() -- Grab the superlock excluding all CPUs + but the current one from a critical section. This lock is used when + we must enforce a global critical section for a single CPU in a + possibly SMP system whichever context the CPUs are running. */ + +unsigned long ipipe_critical_enter(void (*syncfn) (void)) +{ + unsigned long flags; + + local_irq_save_hw(flags); + +#ifdef CONFIG_SMP + if (unlikely(num_online_cpus() =3D=3D 1)) /* We might be running a SMP-= kernel on a UP box... */ + return flags; + + { + int cpu =3D ipipe_processor_id(); + cpumask_t lock_map; + + if (!cpu_test_and_set(cpu, __ipipe_cpu_lock_map)) { + while (cpu_test_and_set(BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) { + int n =3D 0; + do { + cpu_relax(); + } while (++n < cpu); + } + + spin_lock(&__ipipe_cpu_barrier); + + __ipipe_cpu_sync =3D syncfn; + + /* Send the sync IPI to all processors but the current one. */ + send_IPI_allbutself(IPIPE_CRITICAL_VECTOR); + + cpus_andnot(lock_map, cpu_online_map, __ipipe_cpu_lock_map); + + while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) + cpu_relax(); + } + + atomic_inc(&__ipipe_critical_count); + } +#endif /* CONFIG_SMP */ + + return flags; +} + +/* ipipe_critical_exit() -- Release the superlock. */ + +void ipipe_critical_exit(unsigned long flags) +{ +#ifdef CONFIG_SMP + if (num_online_cpus() =3D=3D 1) + goto out; + + if (atomic_dec_and_test(&__ipipe_critical_count)) { + spin_unlock(&__ipipe_cpu_barrier); + + while (!cpus_empty(__ipipe_cpu_sync_map)) + cpu_relax(); + + cpu_clear(ipipe_processor_id(), __ipipe_cpu_lock_map); + cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map); + } +out: +#endif /* CONFIG_SMP */ + + local_irq_restore_hw(flags); +} + +static inline void __fixup_if(struct pt_regs *regs) +{ + if (!ipipe_root_domain_p) + return; + + /* + * Have the saved hw state look like the domain stall bit, so + * that __ipipe_unstall_iret_root() restores the proper + * pipeline state for the root stage upon exit. + */ + + if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) + regs->eflags &=3D ~X86_EFLAGS_IF; + else + regs->eflags |=3D X86_EFLAGS_IF; +} + +/* Check the stall bit of the root domain to make sure the existing + preemption opportunity upon in-kernel resumption could be + exploited. In case a rescheduling could take place, the root stage + is stalled before the hw interrupts are re-enabled. This routine + must be called with hw interrupts off. */ + +asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs) +{ + if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) + /* Root stage is stalled: rescheduling denied. */ + return 0; + + __ipipe_stall_root(); + local_irq_enable_hw_notrace(); + + return 1; /* Ok, may reschedule now. */ +} + +asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs) +{ + /* Emulate IRET's handling of the interrupt flag. */ + + local_irq_disable_hw(); + + /* Restore the software state as it used to be on kernel + entry. CAUTION: NMIs must *not* return through this + emulation. */ + + if (!(regs.eflags & X86_EFLAGS_IF)) { + if (!__test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(statu= s))) + trace_hardirqs_off(); + regs.eflags |=3D X86_EFLAGS_IF; + } else { + if (test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) { + trace_hardirqs_on(); + __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); + } + + /* Only sync virtual IRQs here, so that we don't recurse + indefinitely in case of an external interrupt flood. */ + + if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) !=3D = 0) + __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); + } +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_end(0x8000000D); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ +} + +asmlinkage int __ipipe_syscall_root(struct pt_regs regs) +{ + unsigned long flags; + + __fixup_if(®s); + + /* This routine either returns: + 0 -- if the syscall is to be passed to Linux; + >0 -- if the syscall should not be passed to Linux, and no + tail work should be performed; + <0 -- if the syscall should not be passed to Linux but the + tail work has to be performed (for handling signals etc). */ + + if (__ipipe_syscall_watched_p(current, regs.orig_eax) && + __ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL) && + __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL,®s) > 0) { + /* We might enter here over a non-root domain and exit + * over the root one as a result of the syscall + * (i.e. by recycling the register set of the current + * context across the migration), so we need to fixup + * the interrupt flag upon return too, so that + * __ipipe_unstall_iret_root() resets the correct + * stall bit on exit. */ + __fixup_if(®s); + + if (ipipe_root_domain_p && !in_atomic()) { + /* Sync pending VIRQs before _TIF_NEED_RESCHED is tested. */ + local_irq_save_hw(flags); + if ((ipipe_root_cpudom_var(irqpend_himask) & IPIPE_IRQMASK_VIRT) !=3D= 0) + __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); + local_irq_restore_hw(flags); + return -1; + } + return 1; + } + + return 0; +} + +static fastcall void do_machine_check_vector(struct pt_regs *regs, long = error_code) +{ +#ifdef CONFIG_X86_MCE + extern fastcall void (*machine_check_vector)(struct pt_regs *, long); + machine_check_vector(regs,error_code); +#endif /* CONFIG_X86_MCE */ +} + +fastcall void do_divide_error(struct pt_regs *regs, long error_code); +fastcall void do_overflow(struct pt_regs *regs, long error_code); +fastcall void do_bounds(struct pt_regs *regs, long error_code); +fastcall void do_invalid_op(struct pt_regs *regs, long error_code); +fastcall void do_coprocessor_segment_overrun(struct pt_regs *regs, long = error_code); +fastcall void do_invalid_TSS(struct pt_regs *regs, long error_code); +fastcall void do_segment_not_present(struct pt_regs *regs, long error_co= de); +fastcall void do_stack_segment(struct pt_regs *regs, long error_code); +fastcall void do_general_protection(struct pt_regs *regs, long error_cod= e); +fastcall void do_page_fault(struct pt_regs *regs, long error_code); +fastcall void do_spurious_interrupt_bug(struct pt_regs *regs, long error= _code); +fastcall void do_coprocessor_error(struct pt_regs *regs, long error_code= ); +fastcall void do_alignment_check(struct pt_regs *regs, long error_code);= +fastcall void do_simd_coprocessor_error(struct pt_regs *regs, long error= _code); +fastcall void do_iret_error(struct pt_regs *regs, long error_code); + +/* Work around genksyms's issue with over-qualification in decls. */ + +typedef fastcall void __ipipe_exhandler(struct pt_regs *, long); + +typedef __ipipe_exhandler *__ipipe_exptr; + +static __ipipe_exptr __ipipe_std_extable[] =3D { + + [ex_do_divide_error] =3D &do_divide_error, + [ex_do_overflow] =3D &do_overflow, + [ex_do_bounds] =3D &do_bounds, + [ex_do_invalid_op] =3D &do_invalid_op, + [ex_do_coprocessor_segment_overrun] =3D &do_coprocessor_segment_overrun= , + [ex_do_invalid_TSS] =3D &do_invalid_TSS, + [ex_do_segment_not_present] =3D &do_segment_not_present, + [ex_do_stack_segment] =3D &do_stack_segment, + [ex_do_general_protection] =3D do_general_protection, + [ex_do_page_fault] =3D &do_page_fault, + [ex_do_spurious_interrupt_bug] =3D &do_spurious_interrupt_bug, + [ex_do_coprocessor_error] =3D &do_coprocessor_error, + [ex_do_alignment_check] =3D &do_alignment_check, + [ex_machine_check_vector] =3D &do_machine_check_vector, + [ex_do_simd_coprocessor_error] =3D &do_simd_coprocessor_error, + [ex_do_iret_error] =3D &do_iret_error, +}; + +#ifdef CONFIG_KGDB +#include + +static int __ipipe_xlate_signo[] =3D { + + [ex_do_divide_error] =3D SIGFPE, + [ex_do_debug] =3D SIGTRAP, + [2] =3D -1, + [ex_do_int3] =3D SIGTRAP, + [ex_do_overflow] =3D SIGSEGV, + [ex_do_bounds] =3D SIGSEGV, + [ex_do_invalid_op] =3D SIGILL, + [ex_device_not_available] =3D -1, + [8] =3D -1, + [ex_do_coprocessor_segment_overrun] =3D SIGFPE, + [ex_do_invalid_TSS] =3D SIGSEGV, + [ex_do_segment_not_present] =3D SIGBUS, + [ex_do_stack_segment] =3D SIGBUS, + [ex_do_general_protection] =3D SIGSEGV, + [ex_do_page_fault] =3D SIGSEGV, + [ex_do_spurious_interrupt_bug] =3D -1, + [ex_do_coprocessor_error] =3D -1, + [ex_do_alignment_check] =3D SIGBUS, + [ex_machine_check_vector] =3D -1, + [ex_do_simd_coprocessor_error] =3D -1, + [20 ... 31] =3D -1, + [ex_do_iret_error] =3D SIGSEGV, +}; +#endif /* CONFIG_KGDB */ + +fastcall int __ipipe_handle_exception(struct pt_regs *regs, long error_c= ode, int vector) +{ + unsigned long flags; + + local_save_flags(flags); + + /* Track the hw interrupt state before calling the Linux + * exception handler, replicating it into the virtual mask. */ + + if (irqs_disabled_hw()) { + /* Do not trigger the alarm in ipipe_check_context() by using + * plain local_irq_disable(). */ + __ipipe_stall_root(); + trace_hardirqs_off(); + barrier(); + } + +#ifdef CONFIG_KGDB + /* catch exception KGDB is interested in over non-root domains */ + if (!ipipe_root_domain_p && + __ipipe_xlate_signo[vector] >=3D 0 && + !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], error_c= ode, regs)) { + local_irq_restore(flags); + return 1; + } +#endif /* CONFIG_KGDB */ + + if (!ipipe_trap_notify(vector, regs)) { + __ipipe_exptr handler =3D __ipipe_std_extable[vector]; + handler(regs,error_code); + local_irq_restore(flags); + __fixup_if(regs); + return 0; + } + + local_irq_restore(flags); + + return 1; +} + +fastcall int __ipipe_divert_exception(struct pt_regs *regs, int vector) +{ +#ifdef CONFIG_KGDB + /* catch int1 and int3 over non-root domains */ + if (!ipipe_root_domain_p && vector !=3D ex_device_not_available) { + unsigned int condition =3D 0; + if (vector =3D=3D 1) + get_debugreg(condition, 6); + if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs)) + return 1; + } +#endif /* CONFIG_KGDB */ + + if (ipipe_trap_notify(vector, regs)) + return 1; + + __fixup_if(regs); + + return 0; +} + +/* __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic + interrupt protection log is maintained here for each domain. Hw + interrupts are off on entry. */ + +int __ipipe_handle_irq(struct pt_regs regs) +{ + struct ipipe_domain *this_domain, *next_domain; + unsigned irq =3D regs.orig_eax; + struct list_head *head, *pos; + int m_ack; + + if ((long)regs.orig_eax < 0) { + irq =3D ~irq; + m_ack =3D 0; + } else /* This is a self-triggered interrupt. */ + m_ack =3D 1; + + head =3D __ipipe_pipeline.next; + next_domain =3D list_entry(head, struct ipipe_domain, p_link); + if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))= ) { + if (!m_ack && next_domain->irqs[irq].acknowledge !=3D NULL) + next_domain->irqs[irq].acknowledge(irq); + if (likely(__ipipe_dispatch_wired(next_domain, irq))) { + goto finalize; + } else + goto finalize_nosync; + } + + this_domain =3D ipipe_current_domain; + + if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control)) + head =3D &this_domain->p_link; + + /* Ack the interrupt. */ + + pos =3D head; + + while (pos !=3D &__ipipe_pipeline) { + next_domain =3D list_entry(pos, struct ipipe_domain, p_link); + + /* + * For each domain handling the incoming IRQ, mark it + * as pending in its log. + */ + if (test_bit(IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { + /* + * Domains that handle this IRQ are polled for + * acknowledging it by decreasing priority + * order. The interrupt must be made pending + * _first_ in the domain's status flags before + * the PIC is unlocked. + */ + __ipipe_set_irq_pending(next_domain, irq); + + if (!m_ack && next_domain->irqs[irq].acknowledge !=3D NULL) + m_ack =3D next_domain->irqs[irq].acknowledge(irq); + } + + /* + * If the domain does not want the IRQ to be passed + * down the interrupt pipe, exit the loop now. + */ + + if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) + break; + + pos =3D next_domain->p_link.next; + } + +finalize: + + /* Given our deferred dispatching model for regular IRQs, we + * only record CPU regs for the last timer interrupt, so that + * the timer handler charges CPU times properly. It is assumed + * that other interrupt handlers don't actually care for such + * information. */ + + if (irq =3D=3D __ipipe_tick_irq) + set_irq_regs(®s); + + /* + * Now walk the pipeline, yielding control to the highest + * priority domain that has pending interrupt(s) or + * immediately to the current domain if the interrupt has been + * marked as 'sticky'. This search does not go beyond the + * current domain in the pipeline. + */ + + __ipipe_walk_pipeline(head); + +finalize_nosync: + + if (!ipipe_root_domain_p || + test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) + return 0; + +#ifdef CONFIG_SMP + /* + * Prevent a spurious rescheduling from being triggered on + * preemptible kernels along the way out through + * ret_from_intr. + */ + if ((long)regs.orig_eax < 0) + __set_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); +#endif /* CONFIG_SMP */ + + return 1; +} + +int __ipipe_check_tickdev(const char *devname) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (strcmp(devname, "lapic")) + return __ipipe_check_lapic(); +#endif + + return 1; +} + +EXPORT_SYMBOL(__ipipe_tick_irq); +EXPORT_SYMBOL(ipipe_critical_enter); +EXPORT_SYMBOL(ipipe_critical_exit); +EXPORT_SYMBOL(ipipe_trigger_irq); +EXPORT_SYMBOL(ipipe_get_sysinfo); + +EXPORT_SYMBOL_GPL(irq_desc); +EXPORT_SYMBOL_GPL(__switch_to); +EXPORT_SYMBOL_GPL(show_stack); +EXPORT_PER_CPU_SYMBOL_GPL(init_tss); +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +EXPORT_SYMBOL(tasklist_lock); +#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ +#ifdef CONFIG_SMP +EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate); +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +void notrace mcount(void); +EXPORT_SYMBOL(mcount); +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ Index: linux-2.6.23/arch/i386/kernel/mcount.S =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/arch/i386/kernel/mcount.S @@ -0,0 +1,32 @@ +/* + * linux/arch/i386/mcount.S + * + * Copyright (C) 2005, 2007 Jan Kiszka + */ + +.globl mcount +mcount: + cmpl $0,ipipe_trace_enable + je out + + pushl %ebp + movl %esp,%ebp + + pushl %eax + pushl %ecx + pushl %edx + + pushl $0 # no additional value (v) + movl (%ebp),%eax + movl 0x4(%ebp),%edx # __CALLER_ADDR0 + movl 0x4(%eax),%ecx # __CALLER_ADDR1 + movl $0,%eax # IPIPE_TRACE_FUNC + call __ipipe_trace + popl %eax + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret Index: linux-2.6.23/arch/i386/kernel/nmi.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/nmi.c +++ linux-2.6.23/arch/i386/kernel/nmi.c @@ -48,6 +48,10 @@ static unsigned int nmi_hz =3D HZ; =20 static DEFINE_PER_CPU(short, wd_enabled); =20 +static int default_nmi_watchdog_tick(struct pt_regs * regs, unsigned rea= son); +int (*nmi_watchdog_tick) (struct pt_regs * regs, unsigned reason) =3D &d= efault_nmi_watchdog_tick; +EXPORT_SYMBOL(nmi_watchdog_tick); + /* local prototypes */ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); =20 @@ -317,9 +321,7 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); =20 -extern void die_nmi(struct pt_regs *, const char *msg); - -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +static __kprobes int default_nmi_watchdog_tick (struct pt_regs * regs, u= nsigned reason) { =20 /* Index: linux-2.6.23/arch/i386/kernel/process.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/process.c +++ linux-2.6.23/arch/i386/kernel/process.c @@ -197,6 +197,7 @@ void cpu_idle(void) play_dead(); =20 __get_cpu_var(irq_stat).idle_timestamp =3D jiffies; + ipipe_suspend_domain(); idle(); } tick_nohz_restart_sched_tick(); Index: linux-2.6.23/arch/i386/kernel/smp.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/smp.c +++ linux-2.6.23/arch/i386/kernel/smp.c @@ -142,6 +142,9 @@ void __send_IPI_shortcut(unsigned int sh * to the APIC. */ unsigned int cfg; + unsigned long flags; + + local_irq_save_hw_cond(flags); =20 /* * Wait for idle. @@ -157,6 +160,8 @@ void __send_IPI_shortcut(unsigned int sh * Send the IPI. The write to APIC_ICR fires this off. */ apic_write_around(APIC_ICR, cfg); + + local_irq_restore_hw_cond(flags); } =20 void fastcall send_IPI_self(int vector) @@ -205,10 +210,10 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long mask =3D cpus_addr(cpumask)[0]; unsigned long flags; =20 - local_irq_save(flags); + local_irq_save_hw(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); __send_IPI_dest_field(mask, vector); - local_irq_restore(flags); + local_irq_restore_hw(flags); } =20 void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -222,14 +227,14 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */=20 =20 - local_irq_save(flags); + local_irq_save_hw(flags); for (query_cpu =3D 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); } } - local_irq_restore(flags); + local_irq_restore_hw(flags); } =20 #include /* must come after the send_IPI functions above fo= r inlining */ @@ -312,7 +317,9 @@ void leave_mm(unsigned long cpu) =20 fastcall void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + unsigned long cpu, flags; + + local_irq_save_hw_cond(flags); =20 cpu =3D get_cpu(); =20 @@ -342,6 +349,7 @@ fastcall void smp_invalidate_interrupt(s smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + local_irq_restore_hw_cond(flags); } =20 void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct= *mm, @@ -395,14 +403,17 @@ void flush_tlb_current_task(void) { struct mm_struct *mm =3D current->mm; cpumask_t cpu_mask; + unsigned long flags; =20 preempt_disable(); + local_irq_save_hw_cond(flags); cpu_mask =3D mm->cpu_vm_mask; cpu_clear(smp_processor_id(), cpu_mask); =20 local_flush_tlb(); if (!cpus_empty(cpu_mask)) flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); + local_irq_restore_hw_cond(flags); preempt_enable(); } =20 @@ -430,8 +441,11 @@ void flush_tlb_page(struct vm_area_struc { struct mm_struct *mm =3D vma->vm_mm; cpumask_t cpu_mask; + unsigned long flags; =20 preempt_disable(); + local_irq_save_hw_cond(flags); + cpu_mask =3D mm->cpu_vm_mask; cpu_clear(smp_processor_id(), cpu_mask); =20 @@ -442,6 +456,8 @@ void flush_tlb_page(struct vm_area_struc leave_mm(smp_processor_id()); } =20 + local_irq_restore_hw_cond(flags); + if (!cpus_empty(cpu_mask)) flush_tlb_others(cpu_mask, mm, va); =20 @@ -603,7 +619,7 @@ native_smp_call_function_mask(cpumask_t=20 =20 static void stop_this_cpu (void * dummy) { - local_irq_disable(); + local_irq_disable_hw(); /* * Remove this CPU: */ Index: linux-2.6.23/arch/i386/kernel/smpboot.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/smpboot.c +++ linux-2.6.23/arch/i386/kernel/smpboot.c @@ -1306,6 +1306,11 @@ void __init smp_intr_init(void) =20 /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + +#ifdef CONFIG_IPIPE + /* IPI for critical lock */ + set_intr_gate(IPIPE_CRITICAL_VECTOR, ipipe_ipiX); +#endif } =20 /* Index: linux-2.6.23/arch/i386/kernel/time.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/time.c +++ linux-2.6.23/arch/i386/kernel/time.c @@ -165,11 +165,12 @@ irqreturn_t timer_interrupt(int irq, voi * This will also deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + unsigned long flags; + spin_lock_irqsave_cond(&i8259A_lock,flags); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore_cond(&i8259A_lock,flags); } #endif =20 Index: linux-2.6.23/arch/i386/kernel/traps.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/traps.c +++ linux-2.6.23/arch/i386/kernel/traps.c @@ -320,6 +320,9 @@ void show_registers(struct pt_regs *regs printk(KERN_EMERG "Process %.*s (pid: %d, ti=3D%p task=3D%p task.ti=3D%= p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, task_thread_info(current)); +#ifdef CONFIG_IPIPE + printk(KERN_EMERG "\nI-pipe domain %s", ipipe_current_domain->name); +#endif /* CONFIG_IPIPE */ /* * When in-kernel, we also print out the stack and code at the * time of the fault.. @@ -746,6 +749,8 @@ void __kprobes die_nmi(struct pt_regs *r do_exit(SIGSEGV); } =20 +EXPORT_SYMBOL(die_nmi); + static __kprobes void default_do_nmi(struct pt_regs * regs) { unsigned char reason =3D 0; @@ -788,17 +793,21 @@ static int ignore_nmis; =20 fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) { - int cpu; + int cpu, cs; =20 nmi_enter(); =20 cpu =3D smp_processor_id(); =20 + cs =3D ipipe_disable_context_check(cpu); + ++nmi_count(cpu); =20 if (!ignore_nmis) default_do_nmi(regs); =20 + ipipe_restore_context_check(cpu, cs); + nmi_exit(); } =20 @@ -1092,13 +1101,16 @@ asmlinkage void math_state_restore(void) { struct thread_info *thread =3D current_thread_info(); struct task_struct *tsk =3D thread->task; + unsigned long flags; =20 + local_irq_save_hw_cond(flags); clts(); /* Allow maths ops (or we recurse) */ if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); thread->status |=3D TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; + local_irq_restore_hw_cond(flags); } EXPORT_SYMBOL_GPL(math_state_restore); =20 Index: linux-2.6.23/arch/i386/kernel/vm86.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/kernel/vm86.c +++ linux-2.6.23/arch/i386/kernel/vm86.c @@ -148,12 +148,14 @@ struct pt_regs * fastcall save_v86_state do_exit(SIGSEGV); } =20 + local_irq_disable_hw_cond(); tss =3D &per_cpu(init_tss, get_cpu()); current->thread.esp0 =3D current->thread.saved_esp0; current->thread.sysenter_cs =3D __KERNEL_CS; load_esp0(tss, ¤t->thread); current->thread.saved_esp0 =3D 0; put_cpu(); + local_irq_enable_hw_cond(); =20 ret =3D KVM86->regs32; =20 @@ -324,12 +326,14 @@ static void do_sys_vm86(struct kernel_vm tsk->thread.saved_fs =3D info->regs32->xfs; savesegment(gs, tsk->thread.saved_gs); =20 + local_irq_disable_hw_cond(); tss =3D &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 =3D (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs =3D 0; load_esp0(tss, &tsk->thread); put_cpu(); + local_irq_enable_hw_cond(); =20 tsk->thread.screen_bitmap =3D info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) Index: linux-2.6.23/arch/i386/lib/mmx.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/lib/mmx.c +++ linux-2.6.23/arch/i386/lib/mmx.c @@ -31,7 +31,7 @@ void *_mmx_memcpy(void *to, const void * void *p; int i; =20 - if (unlikely(in_interrupt())) + if (unlikely(!ipipe_root_domain_p || in_interrupt())) return __memcpy(to, from, len); =20 p =3D to; Index: linux-2.6.23/arch/i386/mach-visws/visws_apic.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/mach-visws/visws_apic.c +++ linux-2.6.23/arch/i386/mach-visws/visws_apic.c @@ -28,7 +28,7 @@ #include "irq_vectors.h" =20 =20 -static DEFINE_SPINLOCK(cobalt_lock); +static IPIPE_DEFINE_SPINLOCK(cobalt_lock); =20 /* * Set the given Cobalt APIC Redirection Table entry to point Index: linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/mach-voyager/voyager_basic.c +++ linux-2.6.23/arch/i386/mach-voyager/voyager_basic.c @@ -185,20 +185,20 @@ voyager_timer_interrupt(void) * pointy. */ __u16 val; =20 - spin_lock(&i8253_lock); + spin_lock_irqsave(&i8253_lock); =09 outb_p(0x00, 0x43); val =3D inb_p(0x40); val |=3D inb(0x40) << 8; - spin_unlock(&i8253_lock); + spin_unlock_irqrestore(&i8253_lock); =20 if(val > LATCH) { printk("\nVOYAGER: countdown timer value too high (%d), resetting\n\n= ", val); - spin_lock(&i8253_lock); + spin_lock_irqsave(&i8253_lock); outb(0x34,0x43); outb_p(LATCH & 0xff , 0x40); /* LSB */ outb(LATCH >> 8 , 0x40); /* MSB */ - spin_unlock(&i8253_lock); + spin_unlock_irqrestore(&i8253_lock); } } #ifdef CONFIG_SMP Index: linux-2.6.23/arch/i386/mm/fault.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/mm/fault.c +++ linux-2.6.23/arch/i386/mm/fault.c @@ -311,6 +311,8 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address =3D read_cr2(); =20 + local_irq_enable_hw_cond(); + tsk =3D current; =20 si_code =3D SEGV_MAPERR; @@ -655,3 +657,22 @@ void vmalloc_sync_all(void) start =3D address + PGDIR_SIZE; } } + +#ifdef CONFIG_IPIPE +void __ipipe_pin_range_globally(unsigned long start, unsigned long end) +{ + unsigned long next, addr =3D start; + + do { + unsigned long flags; + struct page *page; + + next =3D pgd_addr_end(addr, end); + spin_lock_irqsave(&pgd_lock, flags); + for (page =3D pgd_list; page; page =3D (struct page *)page->index) + vmalloc_sync_one(page_address(page), addr); + spin_unlock_irqrestore(&pgd_lock, flags); + + } while (addr =3D next, addr !=3D end); +} +#endif /* CONFIG_IPIPE */ Index: linux-2.6.23/drivers/pci/htirq.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/drivers/pci/htirq.c +++ linux-2.6.23/drivers/pci/htirq.c @@ -21,7 +21,7 @@ * With multiple simultaneous hypertransport irq devices it might pay * to make this more fine grained. But start with simple, stupid, and c= orrect. */ -static DEFINE_SPINLOCK(ht_irq_lock); +static IPIPE_DEFINE_SPINLOCK(ht_irq_lock); =20 struct ht_irq_cfg { struct pci_dev *dev; Index: linux-2.6.23/include/asm-i386/apic.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/apic.h +++ linux-2.6.23/include/asm-i386/apic.h @@ -79,7 +79,13 @@ int get_physical_broadcast(void); # define apic_write_around(x,y) apic_write_atomic((x),(y)) #endif =20 +#ifdef CONFIG_IPIPE +#define ack_APIC_irq() do { } while(0) +static inline void __ack_APIC_irq(void) +#else /* !CONFIG_IPIPE */ +#define __ack_APIC_irq() ack_APIC_irq() static inline void ack_APIC_irq(void) +#endif /* CONFIG_IPIPE */ { /* * ack_APIC_irq() actually gets compiled as a single instruction: Index: linux-2.6.23/include/asm-i386/hw_irq.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/hw_irq.h +++ linux-2.6.23/include/asm-i386/hw_irq.h @@ -40,6 +40,13 @@ fastcall void error_interrupt(void); fastcall void spurious_interrupt(void); fastcall void thermal_interrupt(void); #define platform_legacy_irq(irq) ((irq) < 16) +#ifdef CONFIG_IPIPE +fastcall void ipipe_ipi0(void); +fastcall void ipipe_ipi1(void); +fastcall void ipipe_ipi2(void); +fastcall void ipipe_ipi3(void); +fastcall void ipipe_ipiX(void); +#endif #endif =20 void disable_8259A_irq(unsigned int irq); Index: linux-2.6.23/include/asm-i386/i8253.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/i8253.h +++ linux-2.6.23/include/asm-i386/i8253.h @@ -8,7 +8,7 @@ #define PIT_CH0 0x40 #define PIT_CH2 0x42 =20 -extern spinlock_t i8253_lock; +extern ipipe_spinlock_t i8253_lock; =20 extern struct clock_event_device *global_clock_event; =20 Index: linux-2.6.23/include/asm-i386/i8259.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/i8259.h +++ linux-2.6.23/include/asm-i386/i8259.h @@ -7,7 +7,7 @@ extern unsigned int cached_irq_mask; #define cached_master_mask (__byte(0, cached_irq_mask)) #define cached_slave_mask (__byte(1, cached_irq_mask)) =20 -extern spinlock_t i8259A_lock; +extern ipipe_spinlock_t i8259A_lock; =20 extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); Index: linux-2.6.23/include/asm-i386/ipipe.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/asm-i386/ipipe.h @@ -0,0 +1,205 @@ +/* -*- linux-c -*- + * include/asm-i386/ipipe.h + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modif= y + * it under the terms of the GNU General Public License as published b= y + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 0213= 9, + * USA; either version 2 of the License, or (at your option) any later= + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-130= 7, USA. + */ + +#ifndef __I386_IPIPE_H +#define __I386_IPIPE_H + +#ifdef CONFIG_IPIPE + +#define IPIPE_ARCH_STRING "1.10-07" +#define IPIPE_MAJOR_NUMBER 1 +#define IPIPE_MINOR_NUMBER 10 +#define IPIPE_PATCH_NUMBER 7 + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +#define ipipe_processor_id() raw_smp_processor_id() + +#define prepare_arch_switch(next) \ +do { \ + ipipe_schedule_notify(current, next); \ + local_irq_disable_hw(); \ +} while(0) + +#define task_hijacked(p) \ + ({ int x =3D !ipipe_root_domain_p; \ + __clear_bit(IPIPE_SYNC_FLAG, &ipipe_root_cpudom_var(status)); \ + local_irq_enable_hw(); x; }) + +struct ipipe_domain; + +struct ipipe_sysinfo { + + int ncpus; /* Number of CPUs on board */ + u64 cpufreq; /* CPU frequency (in Hz) */ + + /* Arch-dependent block */ + + struct { + unsigned tmirq; /* Timer tick IRQ */ + u64 tmfreq; /* Timer frequency */ + } archdep; +}; + +#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=3DA" (t)) +#define ipipe_cpu_freq() ({ unsigned long long __freq =3D cpu_has_tsc?(1= 000LL * cpu_khz):CLOCK_TICK_RATE; __freq; }) + +#define ipipe_tsc2ns(t) \ +({ \ + unsigned long long delta =3D (t)*1000; \ + do_div(delta, cpu_khz/1000+1); \ + (unsigned long)delta; \ +}) + +#define ipipe_tsc2us(t) \ +({ \ + unsigned long long delta =3D (t); \ + do_div(delta, cpu_khz/1000+1); \ + (unsigned long)delta; \ +}) + +/* Private interface -- Internal use only */ + +#define __ipipe_check_platform() do { } while(0) +#define __ipipe_init_platform() do { } while(0) +#define __ipipe_enable_irq(irq) irq_desc[irq].chip->enable(irq) +#define __ipipe_disable_irq(irq) irq_desc[irq].chip->disable(irq) + +#ifdef CONFIG_SMP +void __ipipe_hook_critical_ipi(struct ipipe_domain *ipd); +#else +#define __ipipe_hook_critical_ipi(ipd) do { } while(0) +#endif + +#define __ipipe_disable_irqdesc(ipd, irq) do { } while(0) + +void __ipipe_enable_irqdesc(struct ipipe_domain *ipd, unsigned irq); + +void __ipipe_enable_pipeline(void); + +int __ipipe_handle_irq(struct pt_regs regs); + +void __ipipe_do_critical_sync(unsigned irq, void *cookie); + +extern int __ipipe_tick_irq; + +#define __ipipe_call_root_xirq_handler(ipd,irq) \ + __asm__ __volatile__ ("pushfl\n\t" \ + "pushl %%cs\n\t" \ + "pushl $1f\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%fs\n\t" \ + "pushl %%es\n\t" \ + "pushl %%ds\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%ebp\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edx\n\t" \ + "pushl %%ecx\n\t" \ + "pushl %%ebx\n\t" \ + "movl %%esp,%%eax\n\t" \ + "call *%1\n\t" \ + "jmp ret_from_intr\n\t" \ + "1: cli\n" \ + : /* no output */ \ + : "a" (~irq), "m" ((ipd)->irqs[irq].handler)) + +#define __ipipe_call_root_virq_handler(ipd,irq) \ + __asm__ __volatile__ ("pushfl\n\t" \ + "pushl %%cs\n\t" \ + "pushl $__virq_end\n\t" \ + "pushl $-1\n\t" \ + "pushl %%fs\n\t" \ + "pushl %%es\n\t" \ + "pushl %%ds\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%ebp\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edx\n\t" \ + "pushl %%ecx\n\t" \ + "pushl %%ebx\n\t" \ + "pushl %2\n\t" \ + "pushl %%eax\n\t" \ + "call *%1\n\t" \ + "addl $8,%%esp\n\t" \ + : /* no output */ \ + : "a" (irq), "m" ((ipd)->irqs[irq].handler), "r" ((ipd)->irqs[irq].c= ookie)) + +#define __ipipe_finalize_root_virq_handler() \ + __asm__ __volatile__ ("jmp ret_from_intr\n\t" \ + "__virq_end: cli\n" \ + : /* no output */ \ + : /* no input */) + +static inline unsigned long __ipipe_ffnz(unsigned long ul) +{ + __asm__("bsrl %1, %0":"=3Dr"(ul) + : "r"(ul)); + return ul; +} + +/* When running handlers, enable hw interrupts for all domains but the + * one heading the pipeline, so that IRQs can never be significantly + * deferred for the latter. */ +#define __ipipe_run_isr(ipd, irq) \ +do { \ + local_irq_enable_nohead(ipd); \ + if (ipd =3D=3D ipipe_root_domain) { \ + if (likely(!ipipe_virtual_irq_p(irq))) { \ + __ipipe_call_root_xirq_handler(ipd,irq); \ + } else { \ + irq_enter(); \ + __ipipe_call_root_virq_handler(ipd,irq); \ + irq_exit(); \ + __ipipe_finalize_root_virq_handler(); \ + } \ + } else { \ + __clear_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ + __set_bit(IPIPE_SYNC_FLAG, &ipipe_cpudom_var(ipd, status)); \ + } \ + local_irq_disable_nohead(ipd); \ +} while(0) + +#endif /* __ASSEMBLY__ */ + +#define __ipipe_syscall_watched_p(p, sc) \ + (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >=3D NR_syscalls) + +int __ipipe_check_lapic(void); + +int __ipipe_check_tickdev(const char *devname); + +#else /* !CONFIG_IPIPE */ + +#define task_hijacked(p) 0 + +#endif /* CONFIG_IPIPE */ + +#endif /* !__I386_IPIPE_H */ Index: linux-2.6.23/include/asm-i386/ipipe_base.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/asm-i386/ipipe_base.h @@ -0,0 +1,182 @@ +/* -*- linux-c -*- + * include/asm-i386/ipipe_base.h + * + * Copyright (C) 2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef __I386_IPIPE_BASE_H +#define __I386_IPIPE_BASE_H + +#include +#include + +#ifdef CONFIG_X86_LOCAL_APIC +/* System interrupts are mapped beyond the last defined external IRQ + * number. */ +#define IPIPE_FIRST_APIC_IRQ NR_IRQS +#define IPIPE_NR_XIRQS (NR_IRQS + 256 - FIRST_SYSTEM_VECTOR) +#define ipipe_apic_irq_vector(irq) ((irq) - IPIPE_FIRST_APIC_IRQ + FIRS= T_SYSTEM_VECTOR) +#define ipipe_apic_vector_irq(vec) ((vec) - FIRST_SYSTEM_VECTOR + IPIPE= _FIRST_APIC_IRQ) +/* If the APIC is enabled, then we expose four service vectors in the + * APIC space which are freely available to domains. */ +#define IPIPE_SERVICE_VECTOR0 0xf5 +#define IPIPE_SERVICE_IPI0 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR0) +#define IPIPE_SERVICE_VECTOR1 0xf6 +#define IPIPE_SERVICE_IPI1 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR1) +#define IPIPE_SERVICE_VECTOR2 0xf7 +#define IPIPE_SERVICE_IPI2 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR2) +#define IPIPE_SERVICE_VECTOR3 0xf8 +#define IPIPE_SERVICE_IPI3 ipipe_apic_vector_irq(IPIPE_SERVICE_VECTOR3) +#define IPIPE_CRITICAL_VECTOR 0xf9 /* SMP-only: used by ipipe_critical_= enter/exit() */ +#define IPIPE_CRITICAL_IPI ipipe_apic_vector_irq(IPIPE_CRITICAL_VECT= OR) +#else /* !CONFIG_X86_LOCAL_APIC */ +#define IPIPE_NR_XIRQS NR_IRQS +#endif /* !CONFIG_X86_LOCAL_APIC */ + +#define IPIPE_IRQ_ISHIFT 5 /* 2^5 for 32bits arch. */ + +#define ex_do_divide_error 0 +#define ex_do_debug 1 +/* NMI not pipelined. */ +#define ex_do_int3 3 +#define ex_do_overflow 4 +#define ex_do_bounds 5 +#define ex_do_invalid_op 6 +#define ex_device_not_available 7 +/* Double fault not pipelined. */ +#define ex_do_coprocessor_segment_overrun 9 +#define ex_do_invalid_TSS 10 +#define ex_do_segment_not_present 11 +#define ex_do_stack_segment 12 +#define ex_do_general_protection 13 +#define ex_do_page_fault 14 +#define ex_do_spurious_interrupt_bug 15 +#define ex_do_coprocessor_error 16 +#define ex_do_alignment_check 17 +#define ex_machine_check_vector 18 +#define ex_do_simd_coprocessor_error 19 +#define ex_do_iret_error 32 + +/* IDT fault vectors */ +#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */ +/* Pseudo-vectors used for kernel events */ +#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS +#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) +#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) +#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) +#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) +#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) +#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) +#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) +#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP +#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) + +#ifndef __ASSEMBLY__ + +#include + +#ifdef CONFIG_SMP + +#define GET_ROOT_STATUS_ADDR \ + "pushfl; cli;" \ + "movl %%fs:per_cpu__this_cpu_off, %%eax;" \ + "lea per_cpu__ipipe_percpu_darray(%%eax), %%eax;" +#define PUT_ROOT_STATUS_ADDR "popfl;" + +static inline void __ipipe_stall_root(void) +{ + __asm__ __volatile__(GET_ROOT_STATUS_ADDR + LOCK_PREFIX + "btsl $0,(%%eax);" + PUT_ROOT_STATUS_ADDR + : : : "eax", "memory"); +} + +static inline unsigned long __ipipe_test_and_stall_root(void) +{ + int oldbit; + + __asm__ __volatile__(GET_ROOT_STATUS_ADDR + LOCK_PREFIX + "btsl $0,(%%eax);" + "sbbl %0,%0;" + PUT_ROOT_STATUS_ADDR + :"=3Dr" (oldbit) + : : "eax", "memory"); + return oldbit; +} + +static inline unsigned long __ipipe_test_root(void) +{ + int oldbit; + + __asm__ __volatile__(GET_ROOT_STATUS_ADDR + "btl $0,(%%eax);" + "sbbl %0,%0;" + PUT_ROOT_STATUS_ADDR + :"=3Dr" (oldbit) + : : "eax"); + return oldbit; +} + +#else /* ! CONFIG_SMP */ + +#if __GNUC__ >=3D 4 +/* Alias to ipipe_root_cpudom_var(status) */ +extern unsigned long __ipipe_root_status; +#else +extern unsigned long *const __ipipe_root_status_addr; +#define __ipipe_root_status (*__ipipe_root_status_addr) +#endif + +static inline void __ipipe_stall_root(void) +{ + volatile unsigned long *p =3D &__ipipe_root_status; + __asm__ __volatile__("btsl $0,%0;" + :"+m" (*p) : : "memory"); +} + +static inline unsigned long __ipipe_test_and_stall_root(void) +{ + volatile unsigned long *p =3D &__ipipe_root_status; + int oldbit; + + __asm__ __volatile__("btsl $0,%1;" + "sbbl %0,%0;" + :"=3Dr" (oldbit), "+m" (*p) + : : "memory"); + return oldbit; +} + +static inline unsigned long __ipipe_test_root(void) +{ + volatile unsigned long *p =3D &__ipipe_root_status; + int oldbit; + + __asm__ __volatile__("btl $0,%1;" + "sbbl %0,%0;" + :"=3Dr" (oldbit) + :"m" (*p)); + return oldbit; +} + +#endif /* CONFIG_SMP */ + +#endif /* !__ASSEMBLY__ */ + +#endif /* !__I386_IPIPE_BASE_H */ Index: linux-2.6.23/include/asm-i386/irqflags.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/irqflags.h +++ linux-2.6.23/include/asm-i386/irqflags.h @@ -12,32 +12,62 @@ #include =20 #ifndef __ASSEMBLY__ + +#include + static inline unsigned long native_save_fl(void) { unsigned long f; +#ifdef CONFIG_IPIPE + f =3D (!__ipipe_test_root()) << 9; + barrier(); +#else asm volatile("pushfl ; popl %0":"=3Dg" (f): /* no input */); +#endif return f; } =20 static inline void native_restore_fl(unsigned long f) { +#ifdef CONFIG_IPIPE + barrier(); + __ipipe_restore_root(!(f & X86_EFLAGS_IF)); +#else asm volatile("pushl %0 ; popfl": /* no output */ :"g" (f) :"memory", "cc"); +#endif } =20 static inline void native_irq_disable(void) { +#ifdef CONFIG_IPIPE + ipipe_check_context(ipipe_root_domain); + __ipipe_stall_root(); + barrier(); +#else asm volatile("cli": : :"memory"); +#endif } =20 static inline void native_irq_enable(void) { +#ifdef CONFIG_IPIPE + barrier(); + __ipipe_unstall_root(); +#else asm volatile("sti": : :"memory"); +#endif } =20 static inline void native_safe_halt(void) { +#ifdef CONFIG_IPIPE + __ipipe_unstall_root(); +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_end(0x8000000E); +#endif +#endif asm volatile("sti; hlt": : :"memory"); } =20 @@ -95,16 +125,31 @@ static inline void halt(void) */ static inline unsigned long __raw_local_irq_save(void) { +#ifdef CONFIG_IPIPE + unsigned long flags =3D (!__ipipe_test_and_stall_root()) << 9; + barrier(); +#else unsigned long flags =3D __raw_local_save_flags(); - raw_local_irq_disable(); - +#endif return flags; } =20 #else + +#ifdef CONFIG_IPIPE +#define DISABLE_INTERRUPTS(clobbers) PER_CPU(ipipe_percpu_darray, %eax);= btsl $0,(%eax); sti +#define ENABLE_INTERRUPTS(clobbers) call __ipipe_unstall_root +#define ENABLE_INTERRUPTS_HW_COND sti +#define DISABLE_INTERRUPTS_HW(clobbers) cli +#define ENABLE_INTERRUPTS_HW(clobbers) sti +#else /* !CONFIG_IPIPE */ #define DISABLE_INTERRUPTS(clobbers) cli #define ENABLE_INTERRUPTS(clobbers) sti +#define ENABLE_INTERRUPTS_HW_COND +#define DISABLE_INTERRUPTS_HW(clobbers) DISABLE_INTERRUPTS(clobbers) +#define ENABLE_INTERRUPTS_HW(clobbers) ENABLE_INTERRUPTS(clobbers) +#endif /* !CONFIG_IPIPE */ #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit #define INTERRUPT_RETURN iret #define GET_CR0_INTO_EAX movl %cr0, %eax @@ -115,8 +160,10 @@ static inline unsigned long __raw_local_ #define raw_local_save_flags(flags) \ do { (flags) =3D __raw_local_save_flags(); } while (0) =20 -#define raw_local_irq_save(flags) \ - do { (flags) =3D __raw_local_irq_save(); } while (0) +#define raw_local_irq_save(flags) do { \ + ipipe_check_context(ipipe_root_domain); \ + (flags) =3D __raw_local_irq_save(); \ + } while (0) =20 static inline int raw_irqs_disabled_flags(unsigned long flags) { @@ -129,6 +176,70 @@ static inline int raw_irqs_disabled(void =20 return raw_irqs_disabled_flags(flags); } + +static inline unsigned long raw_mangle_irq_bits(int virt, unsigned long = real) +{ + /* Merge virtual and real interrupt mask bits into a single + 32bit word. */ + return (real & ~(1 << 31)) | ((virt !=3D 0) << 31); +} + +static inline int raw_demangle_irq_bits(unsigned long *x) +{ + int virt =3D (*x & (1 << 31)) !=3D 0; + *x &=3D ~(1L << 31); + return virt; +} + +#define local_irq_disable_hw_notrace() \ + __asm__ __volatile__("cli": : :"memory") +#define local_irq_enable_hw_notrace() \ + __asm__ __volatile__("sti": : :"memory") +#define local_irq_save_hw_notrace(x) \ + __asm__ __volatile__("pushfl ; popl %0 ; cli":"=3Dg" (x): /* no input *= / :"memory") +#define local_irq_restore_hw_notrace(x) \ + __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memo= ry", "cc") + +#define local_save_flags_hw(x) __asm__ __volatile__("pushfl ; popl %0":"= =3Dg" (x): /* no input */) +#define irqs_disabled_hw() \ + ({ \ + unsigned long x; \ + local_save_flags_hw(x); \ + raw_irqs_disabled_flags(x); \ + }) + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +#define local_irq_disable_hw() do { \ + if (!irqs_disabled_hw()) { \ + local_irq_disable_hw_notrace(); \ + ipipe_trace_begin(0x80000000); \ + } \ + } while (0) +#define local_irq_enable_hw() do { \ + if (irqs_disabled_hw()) { \ + ipipe_trace_end(0x80000000); \ + local_irq_enable_hw_notrace(); \ + } \ + } while (0) +#define local_irq_save_hw(x) do { \ + local_save_flags_hw(x); \ + if (!raw_irqs_disabled_flags(x)) { \ + local_irq_disable_hw_notrace(); \ + ipipe_trace_begin(0x80000001); \ + } \ + } while (0) +#define local_irq_restore_hw(x) do { \ + if (!raw_irqs_disabled_flags(x)) \ + ipipe_trace_end(0x80000001); \ + local_irq_restore_hw_notrace(x); \ + } while (0) +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ +#define local_irq_save_hw(x) local_irq_save_hw_notrace(x) +#define local_irq_restore_hw(x) local_irq_restore_hw_notrace(x) +#define local_irq_enable_hw() local_irq_enable_hw_notrace() +#define local_irq_disable_hw() local_irq_disable_hw_notrace() +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + #endif /* __ASSEMBLY__ */ =20 /* Index: linux-2.6.23/include/asm-i386/mmu_context.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/mmu_context.h +++ linux-2.6.23/include/asm-i386/mmu_context.h @@ -79,8 +79,11 @@ static inline void switch_mm(struct mm_s =20 #define activate_mm(prev, next) \ do { \ + unsigned long flags; \ paravirt_activate_mm(prev, next); \ + local_irq_save_hw_cond(flags); \ switch_mm((prev),(next),NULL); \ + local_irq_restore_hw_cond(flags); \ } while(0); =20 #endif Index: linux-2.6.23/include/asm-i386/nmi.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/nmi.h +++ linux-2.6.23/include/asm-i386/nmi.h @@ -29,7 +29,8 @@ extern void setup_apic_nmi_watchdog (voi extern void stop_apic_nmi_watchdog (void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); +extern int (*nmi_watchdog_tick)(struct pt_regs * regs, unsigned reason);= +extern void die_nmi(struct pt_regs *, const char *msg); =20 extern atomic_t nmi_active; extern unsigned int nmi_watchdog; Index: linux-2.6.23/include/asm-i386/spinlock.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/asm-i386/spinlock.h +++ linux-2.6.23/include/asm-i386/spinlock.h @@ -54,7 +54,7 @@ static inline void __raw_spin_lock(raw_s * NOTE: there's an irqs-on section here, which normally would have to b= e * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant. */ -#ifndef CONFIG_PROVE_LOCKING +#if !defined(CONFIG_PROVE_LOCKING) && !defined(CONFIG_IPIPE) static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned = long flags) { asm volatile( Index: linux-2.6.23/include/linux/clockchips.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/clockchips.h +++ linux-2.6.23/include/linux/clockchips.h @@ -91,6 +91,7 @@ struct clock_event_device { struct list_head list; enum clock_event_mode mode; ktime_t next_event; + int64_t delta; }; =20 /* Index: linux-2.6.23/include/linux/hardirq.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/hardirq.h +++ linux-2.6.23/include/linux/hardirq.h @@ -146,7 +146,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); =20 -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { if (ipipe_root_domain_p) { lockdep_off(); __ir= q_enter(); } } while (0) +#define nmi_exit() do { if (ipipe_root_domain_p) { __irq_exit(); lockde= p_on(); } } while (0) =20 #endif /* LINUX_HARDIRQ_H */ Index: linux-2.6.23/include/linux/ipipe.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe.h @@ -0,0 +1,552 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * + * Copyright (C) 2002-2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef __LINUX_IPIPE_H +#define __LINUX_IPIPE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING +#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ + (IPIPE_MINOR_NUMBER << 8) | \ + (IPIPE_PATCH_NUMBER)) + +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_addre= ss(0)) +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_addre= ss(1)) +#endif /* !BUILTIN_RETURN_ADDRESS */ + +#define IPIPE_ROOT_PRIO 100 +#define IPIPE_ROOT_ID 0 +#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <=3D BITS_PER_LONG */ + +#define IPIPE_RESET_TIMER 0x1 +#define IPIPE_GRAB_TIMER 0x2 + +/* Global domain flags */ +#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ +#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ + +/* Interrupt control bits */ +#define IPIPE_HANDLE_FLAG 0 +#define IPIPE_PASS_FLAG 1 +#define IPIPE_ENABLE_FLAG 2 +#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG +#define IPIPE_STICKY_FLAG 3 +#define IPIPE_SYSTEM_FLAG 4 +#define IPIPE_LOCK_FLAG 5 +#define IPIPE_WIRED_FLAG 6 +#define IPIPE_EXCLUSIVE_FLAG 7 + +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) +#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) +#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) +#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) +#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) +#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) +#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) + +#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) +#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYST= EM_MASK) + +#define IPIPE_EVENT_SELF 0x80000000 + +#define IPIPE_NR_CPUS NR_CPUS + +#define ipipe_current_domain ipipe_cpu_var(ipipe_percpu_domain) + +#define ipipe_virtual_irq_p(irq) ((irq) >=3D IPIPE_VIRQ_BASE && \ + (irq) < IPIPE_NR_IRQS) + +typedef void (*ipipe_irq_handler_t)(unsigned irq, + void *cookie); + +#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) + +typedef int (*ipipe_irq_ackfn_t)(unsigned irq); + +typedef int (*ipipe_event_handler_t)(unsigned event, + struct ipipe_domain *from, + void *data); +struct ipipe_domain { + + int slot; /* Slot number in percpu domain data array. */ + struct list_head p_link; /* Link in pipeline */ + ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ + unsigned long long evself; /* Self-monitored event bits. */ + + struct { + unsigned long control; + ipipe_irq_ackfn_t acknowledge; + ipipe_irq_handler_t handler; + void *cookie; + } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; + + int priority; + void *pdd; + unsigned long flags; + unsigned domid; + const char *name; + struct mutex mutex; +}; + +#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipel= ine */ + +struct ipipe_domain_attr { + + unsigned domid; /* Domain identifier -- Magic value set by caller */ + const char *name; /* Domain name -- Warning: won't be dup'ed! */ + int priority; /* Priority in interrupt pipeline */ + void (*entry) (void); /* Domain entry point */ + void *pdd; /* Per-domain (opaque) data pointer */ +}; + +#ifdef CONFIG_SMP +/* These ops must start and complete on the same CPU: care for + * migration. */ +#define set_bit_safe(b, a) \ + ({ unsigned long __flags; \ + local_irq_save_hw_notrace(__flags); \ + __set_bit(b, a); \ + local_irq_restore_hw_notrace(__flags); }) +#define test_and_set_bit_safe(b, a) \ + ({ unsigned long __flags, __x; \ + local_irq_save_hw_notrace(__flags); \ + __x =3D __test_and_set_bit(b, a); \ + local_irq_restore_hw_notrace(__flags); __x; }) +#define clear_bit_safe(b, a) \ + ({ unsigned long __flags; \ + local_irq_save_hw_notrace(__flags); \ + __clear_bit(b, a); \ + local_irq_restore_hw_notrace(__flags); }) +#else +#define set_bit_safe(b, a) set_bit(b, a) +#define test_and_set_bit_safe(b, a) test_and_set_bit(b, a) +#define clear_bit_safe(b, a) clear_bit(b, a) +#endif + +#define __ipipe_irq_cookie(ipd, irq) (ipd)->irqs[irq].cookie +#define __ipipe_irq_handler(ipd, irq) (ipd)->irqs[irq].handler +#define __ipipe_cpudata_irq_hits(ipd, cpu, irq) ipipe_percpudom(ipd, irq= all, cpu)[irq] + +extern unsigned __ipipe_printk_virq; + +extern unsigned long __ipipe_virtual_irq_map; + +extern struct list_head __ipipe_pipeline; + +extern int __ipipe_event_monitors[]; + +/* Private interface */ + +void ipipe_init(void); + +#ifdef CONFIG_PROC_FS +void ipipe_init_proc(void); + +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_init_tracer(void); +#else /* !CONFIG_IPIPE_TRACE */ +#define __ipipe_init_tracer() do { } while(0) +#endif /* CONFIG_IPIPE_TRACE */ + +#else /* !CONFIG_PROC_FS */ +#define ipipe_init_proc() do { } while(0) +#endif /* CONFIG_PROC_FS */ + +void __ipipe_init_stage(struct ipipe_domain *ipd); + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd); + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_flush_printk(unsigned irq, void *cookie); + +void fastcall __ipipe_walk_pipeline(struct list_head *pos); + +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head);= + +int fastcall __ipipe_dispatch_event(unsigned event, void *data); + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, un= signed irq); + +void fastcall __ipipe_sync_stage(unsigned long syncmask); + +void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned= irq); + +void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsign= ed irq); + +void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)= ; + +void __ipipe_pin_range_globally(unsigned long start, unsigned long end);= + +/* Must be called hw IRQs off. */ +static inline void ipipe_irq_lock(unsigned irq) +{ + __ipipe_lock_irq(ipipe_current_domain, ipipe_processor_id(), irq); +} + +/* Must be called hw IRQs off. */ +static inline void ipipe_irq_unlock(unsigned irq) +{ + __ipipe_unlock_irq(ipipe_current_domain, irq); +} + +#ifndef __ipipe_sync_pipeline +#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask) +#endif + +#ifndef __ipipe_run_irqtail +#define __ipipe_run_irqtail() do { } while(0) +#endif + +#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link =3D=3D __ipipe_pipe= line.next) + +/* + * Keep the following as a macro, so that client code could check for + * the support of the invariant pipeline head optimization. + */ +#define __ipipe_pipeline_head() list_entry(__ipipe_pipeline.next,struct = ipipe_domain,p_link) + +#define __ipipe_event_monitored_p(ev) \ + (__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL= << ev))) + +#ifdef CONFIG_SMP + +cpumask_t __ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall __ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +#endif /* CONFIG_SMP */ + +#define ipipe_sigwake_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT= _SIGWAKE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p); \ +} while(0) + +#define ipipe_exit_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT= _EXIT)) \ + __ipipe_dispatch_event(IPIPE_EVENT_EXIT,p); \ +} while(0) + +#define ipipe_setsched_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT= _SETSCHED)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p); \ +} while(0) + +#define ipipe_schedule_notify(prev, next) \ +do { \ + if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \ + __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \ +} while(0) + +#define ipipe_trap_notify(ex, regs) \ +({ \ + int ret =3D 0; \ + if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_this_cpudom_var(status)) || \ + ((current)->flags & PF_EVNOTIFY)) && \ + __ipipe_event_monitored_p(ex)) \ + ret =3D __ipipe_dispatch_event(ex, regs); \ + ret; \ +}) + +static inline void ipipe_init_notify(struct task_struct *p) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) + __ipipe_dispatch_event(IPIPE_EVENT_INIT,p); +} + +static inline void ipipe_cleanup_notify(struct mm_struct *mm) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) + __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm); +} + +/* Public interface */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr); + +int ipipe_unregister_domain(struct ipipe_domain *ipd); + +void ipipe_suspend_domain(void); + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask); + +int ipipe_control_irq(unsigned irq, + unsigned clrmask, + unsigned setmask); + +unsigned ipipe_alloc_virq(void); + +int ipipe_free_virq(unsigned virq); + +int fastcall ipipe_trigger_irq(unsigned irq); + +static inline int ipipe_propagate_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, ipipe_current_domain->p_link.next); +} + +static inline int ipipe_schedule_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, &ipipe_current_domain->p_link); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_d= omain *ipd); + +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe= _domain *ipd); + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x); + +static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain= *ipd) +{ + return test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); +} + +static inline void ipipe_stall_pipeline_head(void) +{ + local_irq_disable_hw(); + __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_pipeline_head(), = status)); +} + +static inline unsigned long ipipe_test_and_stall_pipeline_head(void) +{ + local_irq_disable_hw(); + return __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(__ipipe_p= ipeline_head(), status)); +} + +void ipipe_unstall_pipeline_head(void); + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_do= main, + unsigned long x); + +static inline void ipipe_restore_pipeline_head(unsigned long x) +{ + struct ipipe_domain *head_domain =3D __ipipe_pipeline_head(); + /* On some archs, __test_and_set_bit() might return different + * truth value than test_bit(), so we test the exclusive OR of + * both statuses, assuming that the lowest bit is always set in + * the truth value (if this is wrong, the failed optimization will + * be caught in __ipipe_restore_pipeline_head() if + * CONFIG_DEBUG_KERNEL is set). */ + if ((x ^ test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, stat= us))) & 1) + __ipipe_restore_pipeline_head(head_domain, x); +} + +#define ipipe_unstall_pipeline() \ + ipipe_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_unstall_pipeline() \ + ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_pipeline() \ + ipipe_test_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_stall_pipeline() \ + ipipe_test_and_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_stall_pipeline() \ + ipipe_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_restore_pipeline(x) \ + ipipe_restore_pipeline_from(ipipe_current_domain, (x)) + +void ipipe_init_attr(struct ipipe_domain_attr *attr); + +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); + +unsigned long ipipe_critical_enter(void (*syncfn) (void)); + +void ipipe_critical_exit(unsigned long flags); + +static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) +{ + set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) +{ + clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + __set_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); +} + +static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + __clear_bit(IPIPE_NOSTACK_FLAG, &ipipe_cpudom_var(ipd, status)); +} + +#define ipipe_safe_current() \ +({ \ + struct task_struct *p; \ + p =3D test_bit(IPIPE_NOSTACK_FLAG, \ + &ipipe_this_cpudom_var(status)) ? &init_task : current; \ + p; \ +}) + +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler); + +cpumask_t ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +int ipipe_setscheduler_root(struct task_struct *p, + int policy, + int prio); + +int ipipe_reenter_root(struct task_struct *prev, + int policy, + int prio); + +int ipipe_alloc_ptdkey(void); + +int ipipe_free_ptdkey(int key); + +int fastcall ipipe_set_ptd(int key, + void *value); + +void fastcall *ipipe_get_ptd(int key); + +int ipipe_disable_ondemand_mappings(struct task_struct *tsk); + +#define local_irq_enable_hw_cond() local_irq_enable_hw() +#define local_irq_disable_hw_cond() local_irq_disable_hw() +#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) +#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) +#define local_irq_disable_head() ipipe_stall_pipeline_head() + +#define local_irq_enable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_enable_hw(); \ + } while(0) + +#define local_irq_disable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_disable_hw(); \ + } while(0) + +#define ipipe_root_domain_p (ipipe_current_domain =3D=3D ipipe_root_dom= ain) + +#else /* !CONFIG_IPIPE */ + +#define ipipe_init() do { } while(0) +#define ipipe_suspend_domain() do { } while(0) +#define ipipe_sigwake_notify(p) do { } while(0) +#define ipipe_setsched_notify(p) do { } while(0) +#define ipipe_init_notify(p) do { } while(0) +#define ipipe_exit_notify(p) do { } while(0) +#define ipipe_cleanup_notify(mm) do { } while(0) +#define ipipe_trap_notify(t,r) 0 +#define ipipe_init_proc() do { } while(0) +#define __ipipe_pin_range_globally(start, end) do { } while(0) + +#define local_irq_enable_hw_cond() do { } while(0) +#define local_irq_disable_hw_cond() do { } while(0) +#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) +#define local_irq_restore_hw_cond(flags) do { } while(0) + +#define ipipe_irq_lock(irq) do { } while(0) +#define ipipe_irq_unlock(irq) do { } while(0) + +#define ipipe_root_domain_p 1 +#define ipipe_safe_current current + +#define local_irq_disable_head() local_irq_disable() + +#endif /* CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +#include +#include + +static inline int ipipe_disable_context_check(int cpu) +{ + return xchg(&per_cpu(ipipe_percpu_context_check, cpu), 0); +} + +static inline void ipipe_restore_context_check(int cpu, int old_state) +{ + per_cpu(ipipe_percpu_context_check, cpu) =3D old_state; +} + +static inline void ipipe_context_check_off(void) +{ + int cpu; + for_each_online_cpu(cpu) + per_cpu(ipipe_percpu_context_check, cpu) =3D 0; +} + +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +static inline int ipipe_disable_context_check(int cpu) +{ + return 0; +} + +static inline void ipipe_restore_context_check(int cpu, int old_state) {= } + +static inline void ipipe_context_check_off(void) { } + +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +#endif /* !__LINUX_IPIPE_H */ Index: linux-2.6.23/include/linux/ipipe_base.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe_base.h @@ -0,0 +1,77 @@ +/* -*- linux-c -*- + * include/linux/ipipe_base.h + * + * Copyright (C) 2002-2007 Philippe Gerum. + * 2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef __LINUX_IPIPE_BASE_H +#define __LINUX_IPIPE_BASE_H + +#ifdef CONFIG_IPIPE + +#include +#include + +/* Number of virtual IRQs */ +#define IPIPE_NR_VIRQS BITS_PER_LONG +/* First virtual IRQ # */ +#define IPIPE_VIRQ_BASE (((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_P= ER_LONG) * BITS_PER_LONG) +/* Total number of IRQ slots */ +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS) +/* Number of indirect words needed to map the whole IRQ space. */ +#define IPIPE_IRQ_IWORDS ((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER= _LONG) +#define IPIPE_IRQ_IMASK (BITS_PER_LONG - 1) +#define IPIPE_IRQMASK_ANY (~0L) +#define IPIPE_IRQMASK_VIRT (IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS= _PER_LONG)) + +/* Per-cpu pipeline status */ +#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at b= it #0 */ +#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the do= main */ +#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack= */ + +#define IPIPE_STALL_MASK (1L << IPIPE_STALL_FLAG) +#define IPIPE_SYNC_MASK (1L << IPIPE_SYNC_FLAG) + +extern struct ipipe_domain ipipe_root; + +#define ipipe_root_domain (&ipipe_root) + +void __ipipe_unstall_root(void); + +void __ipipe_restore_root(unsigned long x); + +#define ipipe_preempt_disable(flags) local_irq_save_hw(flags) +#define ipipe_preempt_enable(flags) local_irq_restore_hw(flags) + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +void ipipe_check_context(struct ipipe_domain *border_ipd); +#else /* !CONFIG_IPIPE_DEBUG_CONTEXT */ +static inline void ipipe_check_context(struct ipipe_domain *border_ipd) = { } +#endif /* !CONFIG_IPIPE_DEBUG_CONTEXT */ + +#else /* !CONFIG_IPIPE */ +#define ipipe_preempt_disable(flags) do { \ + preempt_disable(); \ + (void)(flags); \ + while (0) +#define ipipe_preempt_enable(flags) preempt_enable() +#define ipipe_check_context(ipd) do { } while(0) +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_BASE_H */ Index: linux-2.6.23/include/linux/ipipe_compat.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe_compat.h @@ -0,0 +1,54 @@ +/* -*- linux-c -*- + * include/linux/ipipe_compat.h + * + * Copyright (C) 2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef __LINUX_IPIPE_COMPAT_H +#define __LINUX_IPIPE_COMPAT_H + +#ifdef CONFIG_IPIPE_COMPAT +/* + * OBSOLETE: defined only for backward compatibility. Will be removed + * in future releases, please update client code accordingly. + */ + +#ifdef CONFIG_SMP +#define ipipe_declare_cpuid int cpuid +#define ipipe_load_cpuid() do { \ + cpuid =3D ipipe_processor_id(); \ + } while(0) +#define ipipe_lock_cpu(flags) do { \ + local_irq_save_hw(flags); \ + cpuid =3D ipipe_processor_id(); \ + } while(0) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) +#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) +#else /* !CONFIG_SMP */ +#define ipipe_declare_cpuid const int cpuid =3D 0 +#define ipipe_load_cpuid() do { } while(0) +#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) +#define ipipe_put_cpu(flags) do { } while(0) +#endif /* CONFIG_SMP */ + +#endif /* CONFIG_IPIPE_COMPAT */ + +#endif /* !__LINUX_IPIPE_COMPAT_H */ Index: linux-2.6.23/include/linux/ipipe_percpu.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe_percpu.h @@ -0,0 +1,69 @@ +/* -*- linux-c -*- + * include/linux/ipipe_percpu.h + * + * Copyright (C) 2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modif= y + * it under the terms of the GNU General Public License as published b= y + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 0213= 9, + * USA; either version 2 of the License, or (at your option) any later= + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-130= 7, USA. + */ + +#ifndef __LINUX_IPIPE_PERCPU_H +#define __LINUX_IPIPE_PERCPU_H + +#include +#include + +struct ipipe_domain; + +struct ipipe_percpu_domain_data { + unsigned long status; /* <=3D Must be first in struct. */ + unsigned long irqpend_himask; + unsigned long irqpend_lomask[IPIPE_IRQ_IWORDS]; + unsigned long irqheld_mask[IPIPE_IRQ_IWORDS]; + unsigned long irqall[IPIPE_NR_IRQS]; + u64 evsync; +}; + +#ifdef CONFIG_SMP +#define ipipe_percpudom(ipd, var, cpu) \ + (per_cpu(ipipe_percpu_darray, cpu)[(ipd)->slot].var) +#define ipipe_cpudom_var(ipd, var) \ + (__raw_get_cpu_var(ipipe_percpu_darray)[(ipd)->slot].var) +#else +DECLARE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CO= NFIG_IPIPE_DOMAINS]); +#define ipipe_percpudom(ipd, var, cpu) \ + (per_cpu(ipipe_percpu_daddr, cpu)[(ipd)->slot]->var) +#define ipipe_cpudom_var(ipd, var) \ + (__raw_get_cpu_var(ipipe_percpu_daddr)[(ipd)->slot]->var) +#endif + +DECLARE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CON= FIG_IPIPE_DOMAINS]); + +DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT +DECLARE_PER_CPU(int, ipipe_percpu_context_check); +#endif + +#define ipipe_percpu(var, cpu) per_cpu(var, cpu) +#define ipipe_cpu_var(var) __raw_get_cpu_var(var) + +#define ipipe_root_cpudom_var(var) \ + __raw_get_cpu_var(ipipe_percpu_darray)[0].var + +#define ipipe_this_cpudom_var(var) \ + ipipe_cpudom_var(ipipe_current_domain, var) + +#endif /* !__LINUX_IPIPE_PERCPU_H */ Index: linux-2.6.23/include/linux/ipipe_tickdev.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe_tickdev.h @@ -0,0 +1,55 @@ +/* -*- linux-c -*- + * include/linux/ipipe_tickdev.h + * + * Copyright (C) 2007 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef __LINUX_IPIPE_TICKDEV_H +#define __LINUX_IPIPE_TICKDEV_H + +#if defined(CONFIG_IPIPE) && defined(CONFIG_GENERIC_CLOCKEVENTS) + +#include + +struct tick_device; + +struct ipipe_tick_device { + + void (*emul_set_mode)(enum clock_event_mode, + struct ipipe_tick_device *tdev); + int (*emul_set_tick)(unsigned long delta, + struct ipipe_tick_device *tdev); + void (*real_set_mode)(enum clock_event_mode mode, + struct clock_event_device *cdev); + int (*real_set_tick)(unsigned long delta, + struct clock_event_device *cdev); + struct tick_device *slave; +}; + +int ipipe_request_tickdev(const char *devname, + void (*emumode)(enum clock_event_mode mode, + struct ipipe_tick_device *tdev), + int (*emutick)(unsigned long evt, + struct ipipe_tick_device *tdev), + int cpu); + +void ipipe_release_tickdev(int cpu); + +#endif /* CONFIG_IPIPE && CONFIG_GENERIC_CLOCKEVENTS */ + +#endif /* !__LINUX_IPIPE_TICKDEV_H */ Index: linux-2.6.23/include/linux/ipipe_trace.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/include/linux/ipipe_trace.h @@ -0,0 +1,65 @@ +/* -*- linux-c -*- + * include/linux/ipipe_trace.h + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#ifndef _LINUX_IPIPE_TRACE_H +#define _LINUX_IPIPE_TRACE_H + +#ifdef CONFIG_IPIPE + +#include + +void ipipe_trace_begin(unsigned long v); +void ipipe_trace_end(unsigned long v); +void ipipe_trace_freeze(unsigned long v); +void ipipe_trace_special(unsigned char special_id, unsigned long v); +void ipipe_trace_pid(pid_t pid, short prio); + +int ipipe_trace_max_reset(void); +int ipipe_trace_frozen_reset(void); + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +#define ipipe_trace_irq_entry(irq) ipipe_trace_begin(irq) +#define ipipe_trace_irq_exit(irq) ipipe_trace_end(irq) +#define ipipe_trace_irqsoff() ipipe_trace_begin(0x80000000UL) +#define ipipe_trace_irqson() ipipe_trace_end(0x80000000UL) +#else +#define ipipe_trace_irq_entry(irq) do { } while(0) +#define ipipe_trace_irq_exit(irq) do { } while(0) +#define ipipe_trace_irqsoff() do { } while(0) +#define ipipe_trace_irqson() do { } while(0) +#endif + +#endif /* CONFIG_IPIPE */ + +#ifdef CONFIG_IPIPE_TRACE_PANIC + +void ipipe_trace_panic_freeze(void); +void ipipe_trace_panic_dump(void); + +#else /* !CONFIG_IPIPE_TRACE_PANIC */ + +static inline void ipipe_trace_panic_freeze(void) { } +static inline void ipipe_trace_panic_dump(void) { } + +#endif /* !CONFIG_IPIPE_TRACE_PANIC */ + +#endif /* !__LINUX_IPIPE_H */ Index: linux-2.6.23/include/linux/irq.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/irq.h +++ linux-2.6.23/include/linux/irq.h @@ -150,6 +150,12 @@ struct irq_chip { * @name: flow handler name for /proc/interrupts output */ struct irq_desc { +#ifdef CONFIG_IPIPE + void fastcall (*ipipe_ack)(unsigned int irq, + struct irq_desc *desc); + void fastcall (*ipipe_end)(unsigned int irq, + struct irq_desc *desc); +#endif /* CONFIG_IPIPE */ irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; Index: linux-2.6.23/include/linux/kernel.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/kernel.h +++ linux-2.6.23/include/linux/kernel.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include =20 @@ -84,9 +85,12 @@ struct user; */ #ifdef CONFIG_PREEMPT_VOLUNTARY extern int cond_resched(void); -# define might_resched() cond_resched() +# define might_resched() do { \ + ipipe_check_context(ipipe_root_domain); \ + cond_resched(); \ + } while (0) #else -# define might_resched() do { } while (0) +# define might_resched() ipipe_check_context(ipipe_root_domain) #endif =20 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP Index: linux-2.6.23/include/linux/linkage.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/linkage.h +++ linux-2.6.23/include/linux/linkage.h @@ -64,4 +64,8 @@ #define fastcall #endif =20 +#ifndef notrace +#define notrace __attribute__((no_instrument_function)) +#endif + #endif Index: linux-2.6.23/include/linux/mm.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/mm.h +++ linux-2.6.23/include/linux/mm.h @@ -168,6 +168,7 @@ extern unsigned int kobjsize(const void=20 #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mma= p) */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" d= one on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_PINNED 0x08000000 /* Disable faults for the vma */ =20 #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear page= s */ =20 Index: linux-2.6.23/include/linux/preempt.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/preempt.h +++ linux-2.6.23/include/linux/preempt.h @@ -8,6 +8,7 @@ =20 #include #include +#include #include =20 #ifdef CONFIG_DEBUG_PREEMPT @@ -29,18 +30,21 @@ asmlinkage void preempt_schedule(void); =20 #define preempt_disable() \ do { \ + ipipe_check_context(ipipe_root_domain); \ inc_preempt_count(); \ barrier(); \ } while (0) =20 #define preempt_enable_no_resched() \ do { \ + ipipe_check_context(ipipe_root_domain); \ barrier(); \ dec_preempt_count(); \ } while (0) =20 #define preempt_check_resched() \ do { \ + ipipe_check_context(ipipe_root_domain); \ if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ preempt_schedule(); \ } while (0) @@ -54,10 +58,10 @@ do { \ =20 #else =20 -#define preempt_disable() do { } while (0) -#define preempt_enable_no_resched() do { } while (0) -#define preempt_enable() do { } while (0) -#define preempt_check_resched() do { } while (0) +#define preempt_disable() ipipe_check_context(ipipe_root_domain) +#define preempt_enable_no_resched() ipipe_check_context(ipipe_root_domai= n) +#define preempt_enable() ipipe_check_context(ipipe_root_domain) +#define preempt_check_resched() ipipe_check_context(ipipe_root_domain) =20 #endif =20 Index: linux-2.6.23/include/linux/sched.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/sched.h +++ linux-2.6.23/include/linux/sched.h @@ -57,6 +57,7 @@ struct sched_param { #include #include #include +#include =20 #include #include @@ -175,6 +176,13 @@ print_cfs_rq(struct seq_file *m, int cpu /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 #define TASK_DEAD 128 +#ifdef CONFIG_IPIPE +#define TASK_ATOMICSWITCH 512 +#define TASK_NOWAKEUP 1024 +#else /* !CONFIG_IPIPE */ +#define TASK_ATOMICSWITCH 0 +#define TASK_NOWAKEUP 0 +#endif /* CONFIG_IPIPE */ =20 #define __set_task_state(tsk, state_value) \ do { (tsk)->state =3D (state_value); } while (0) @@ -1184,6 +1192,9 @@ struct task_struct { =20 atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; +#ifdef CONFIG_IPIPE + void *ptd[IPIPE_ROOT_NPTDKEYS]; +#endif =20 /* * cache last used pipe for splice @@ -1334,6 +1345,11 @@ static inline void put_task_struct(struc #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tes= ter */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as fre= ezeable */ +#ifdef CONFIG_IPIPE +#define PF_EVNOTIFY 0x80000000 /* Notify other domains about internal ev= ents */ +#else +#define PF_EVNOTIFY 0 +#endif /* CONFIG_IPIPE */ =20 /* * Only the _current_ task can read/write to tsk->flags, but other Index: linux-2.6.23/include/linux/spinlock.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/spinlock.h +++ linux-2.6.23/include/linux/spinlock.h @@ -172,7 +172,90 @@ do { \ #define read_trylock(lock) __cond_lock(lock, _read_trylock(lock)) #define write_trylock(lock) __cond_lock(lock, _write_trylock(lock)) =20 -#define spin_lock(lock) _spin_lock(lock) +#undef TYPE_EQUAL +#define TYPE_EQUAL(lock, type) \ + __builtin_types_compatible_p(typeof(lock), type *) + +#define PICK_SPINOP(op, lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \ + __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin##op((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINOP_RAW(op, lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) \ + __raw_spin##op(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + else if (TYPE_EQUAL(lock, spinlock_t)) \ + __raw_spin##op(&((spinlock_t *)(lock))->raw_lock); \ +} while (0) + +#define PICK_SPINLOCK_IRQ(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_lock_irq((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINUNLOCK_IRQ(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); = \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_unlock_irq((spinlock_t *)(lock)); \ +} while (0) + +#define PICK_SPINLOCK_IRQ_RAW(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_lock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + local_irq_disable(); \ + __raw_spin_lock(&((spinlock_t *)(lock))->raw_lock); \ +} while (0) + +#define PICK_SPINUNLOCK_IRQ_RAW(lock) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_unlock_irq(&((__ipipe_spinlock_t *)(lock))->__raw_lock); = \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + __raw_spin_unlock(&((spinlock_t *)(lock))->raw_lock); \ + local_irq_enable(); \ +} while (0) + +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +extern int __bad_spinlock_type(void); + +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + (flags) =3D __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))-= >__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + flags =3D _spin_lock_irqsave((spinlock_t *)(lock)); \ + else __bad_spinlock_type(); \ +} while (0) +#else +#define PICK_SPINLOCK_IRQSAVE(lock, flags) \ +do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + (flags) =3D __ipipe_spin_lock_irqsave(&((__ipipe_spinlock_t *)(lock))-= >__raw_lock); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_lock_irqsave((spinlock_t *)(lock), flags); \ +} while (0) +#endif + +#define PICK_SPINUNLOCK_IRQRESTORE(lock, flags) \ + do { \ + if (TYPE_EQUAL((lock), __ipipe_spinlock_t)) { \ + __ipipe_spin_unlock_irqrestore(&((__ipipe_spinlock_t *)(lock))->__raw_= lock, flags); \ + } else if (TYPE_EQUAL(lock, spinlock_t)) \ + _spin_unlock_irqrestore((spinlock_t *)(lock), flags); \ +} while (0) + +#define spin_lock(lock) PICK_SPINOP(_lock, lock) =20 #ifdef CONFIG_DEBUG_LOCK_ALLOC # define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subcla= ss) @@ -185,7 +268,7 @@ do { \ =20 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) =20 -#define spin_lock_irqsave(lock, flags) flags =3D _spin_lock_irqsave(lock= ) +#define spin_lock_irqsave(lock, flags) PICK_SPINLOCK_IRQSAVE(lock, flags= ) #define read_lock_irqsave(lock, flags) flags =3D _read_lock_irqsave(lock= ) #define write_lock_irqsave(lock, flags) flags =3D _write_lock_irqsave(lo= ck) =20 @@ -199,7 +282,7 @@ do { \ =20 #else =20 -#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) +#define spin_lock_irqsave(lock, flags) PICK_SPINLOCK_IRQSAVE(lock, flags= ) #define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) #define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags)= #define spin_lock_irqsave_nested(lock, flags, subclass) \ @@ -207,7 +290,7 @@ do { \ =20 #endif =20 -#define spin_lock_irq(lock) _spin_lock_irq(lock) +#define spin_lock_irq(lock) PICK_SPINLOCK_IRQ(lock) #define spin_lock_bh(lock) _spin_lock_bh(lock) =20 #define read_lock_irq(lock) _read_lock_irq(lock) @@ -221,32 +304,40 @@ do { \ */ #if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \ !defined(CONFIG_SMP) -# define spin_unlock(lock) _spin_unlock(lock) +#define spin_unlock(lock) PICK_SPINOP(_unlock, lock) # define read_unlock(lock) _read_unlock(lock) # define write_unlock(lock) _write_unlock(lock) -# define spin_unlock_irq(lock) _spin_unlock_irq(lock) -# define read_unlock_irq(lock) _read_unlock_irq(lock) -# define write_unlock_irq(lock) _write_unlock_irq(lock) -#else -# define spin_unlock(lock) \ - do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (= 0) -# define read_unlock(lock) \ - do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (= 0) -# define write_unlock(lock) \ - do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while = (0) -# define spin_unlock_irq(lock) \ +# define spin_unlock_irq(lock) PICK_SPINUNLOCK_IRQ(lock) +# define read_unlock_irq(lock) _read_unlock_irq(lock) +# define write_unlock_irq(lock) _write_unlock_irq(lock) +#else +# define spin_unlock(lock) \ do { \ - __raw_spin_unlock(&(lock)->raw_lock); \ + PICK_SPINOP_RAW(_unlock, lock); \ + __release(lock); \ +} while(0) +# define read_unlock(lock) \ +do { \ + __raw_read_unlock(&(lock)->raw_lock); \ + __release(lock); \ +} while (0) +# define write_unlock(lock) \ +do { \ + __raw_write_unlock(&(lock)->raw_lock); \ __release(lock); \ - local_irq_enable(); \ } while (0) -# define read_unlock_irq(lock) \ +# define spin_unlock_irq(lock) \ +do { \ + PICK_SPINUNLOCK_IRQ_RAW(lock); \ + __release(lock); \ +} while(0) +# define read_unlock_irq(lock) \ do { \ __raw_read_unlock(&(lock)->raw_lock); \ __release(lock); \ local_irq_enable(); \ } while (0) -# define write_unlock_irq(lock) \ +# define write_unlock_irq(lock) \ do { \ __raw_write_unlock(&(lock)->raw_lock); \ __release(lock); \ @@ -254,8 +345,8 @@ do { \ } while (0) #endif =20 -#define spin_unlock_irqrestore(lock, flags) \ - _spin_unlock_irqrestore(lock, flags) +#define spin_unlock_irqrestore(lock, flags) \ + PICK_SPINUNLOCK_IRQRESTORE(lock, flags) #define spin_unlock_bh(lock) _spin_unlock_bh(lock) =20 #define read_unlock_irqrestore(lock, flags) \ @@ -346,4 +437,29 @@ extern int _atomic_dec_and_lock(atomic_t */ #define spin_can_lock(lock) (!spin_is_locked(lock)) =20 +#ifdef CONFIG_IPIPE +void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock); +unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, + unsigned long x); +void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock); +void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x); +#define spin_lock_irqsave_cond(lock, flags) \ + spin_lock_irqsave(lock, flags) +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock_irqrestore(lock, flags) +#else +#define spin_lock_irqsave_cond(lock, flags) \ + do { (void)(flags); spin_lock(lock); } while(0) +#define spin_unlock_irqrestore_cond(lock, flags) \ + spin_unlock(lock) +#define __ipipe_spin_lock_irq(lock) do { } while(0) +#define __ipipe_spin_unlock_irq(lock) do { } while(0) +#define __ipipe_spin_lock_irqsave(lock) 0 +#define __ipipe_spin_unlock_irqrestore(lock, x) do { (void)(x); } while(= 0) +#define __ipipe_spin_unlock_irqbegin(lock) do { } while(0) +#define __ipipe_spin_unlock_irqcomplete(x) do { (void)(x); } while(0) +#endif + #endif /* __LINUX_SPINLOCK_H */ Index: linux-2.6.23/include/linux/spinlock_types.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/include/linux/spinlock_types.h +++ linux-2.6.23/include/linux/spinlock_types.h @@ -31,6 +31,10 @@ typedef struct { #endif } spinlock_t; =20 +typedef struct { + raw_spinlock_t __raw_lock; +} __ipipe_spinlock_t; + #define SPINLOCK_MAGIC 0xdead4ead =20 typedef struct { @@ -92,9 +96,19 @@ typedef struct { * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate. */ #define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init) +#define IPIPE_SPIN_LOCK_UNLOCKED \ + (__ipipe_spinlock_t) { .__raw_lock =3D __RAW_SPIN_LOCK_UNLOCKED } #define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init) =20 #define DEFINE_SPINLOCK(x) spinlock_t x =3D __SPIN_LOCK_UNLOCKED(x) #define DEFINE_RWLOCK(x) rwlock_t x =3D __RW_LOCK_UNLOCKED(x) =20 +#ifdef CONFIG_IPIPE +# define ipipe_spinlock_t __ipipe_spinlock_t +# define IPIPE_DEFINE_SPINLOCK(x) ipipe_spinlock_t x =3D IPIPE_SPIN_LOCK= _UNLOCKED +#else +# define ipipe_spinlock_t spinlock_t +# define IPIPE_DEFINE_SPINLOCK(x) DEFINE_SPINLOCK(x) +#endif + #endif /* __LINUX_SPINLOCK_TYPES_H */ Index: linux-2.6.23/init/Kconfig =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/init/Kconfig +++ linux-2.6.23/init/Kconfig @@ -64,6 +64,7 @@ config INIT_ENV_ARG_LIMIT =20 config LOCALVERSION string "Local version - append to kernel release" + default "-ipipe" help Append an extra string to the end of your kernel version. This will show up when you type uname, for example. @@ -668,3 +669,36 @@ config STOP_MACHINE Need stop_machine() primitive. =20 source "block/Kconfig" + +menu "Real-time sub-system" + +comment "WARNING! You enabled APM, CPU Frequency scaling or ACPI 'proces= sor'" + depends on APM || CPU_FREQ || ACPI_PROCESSOR +comment "option. These options are known to cause troubles with Xenomai.= " + depends on APM || CPU_FREQ || ACPI_PROCESSOR + +comment "NOTE: Xenomai conflicts with PC speaker support." + depends on !X86_TSC && X86 && INPUT_PCSPKR +comment "(menu Device Drivers/Input device support/Miscellaneous devices= )" + depends on !X86_TSC && X86 && INPUT_PCSPKR + +comment "NOTE: Xenomai conflicts with HPET Timer support." + depends on !X86_LOCAL_APIC && X86 && HPET_TIMER +comment "(menu Processor type and features/HPET Timer Support)" + depends on !X86_LOCAL_APIC && X86 && HPET_TIMER + +config XENOMAI + depends on ((X86_TSC || !X86 || !INPUT_PCSPKR) && (!HPET_TIMER || !X86 = || X86_LOCAL_APIC)) + bool "Xenomai" + default y + select IPIPE + + help + Xenomai is a real-time extension to the Linux kernel. Note + that Xenomai relies on Adeos interrupt pipeline (CONFIG_IPIPE + option) to be enabled, so enabling this option selects the + CONFIG_IPIPE option. + +source "arch/i386/xenomai/Kconfig" + +endmenu Index: linux-2.6.23/init/main.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/init/main.c +++ linux-2.6.23/init/main.c @@ -524,7 +524,7 @@ asmlinkage void __init start_kernel(void unwind_init(); lockdep_init(); =20 - local_irq_disable(); + local_irq_disable_hw(); early_boot_irqs_off(); early_init_irq_lock_class(); =20 @@ -577,6 +577,11 @@ asmlinkage void __init start_kernel(void softirq_init(); timekeeping_init(); time_init(); + /* + * We need to wait for the interrupt and time subsystems to be + * initialized before enabling the pipeline. + */ + ipipe_init(); profile_init(); if (!irqs_disabled()) printk("start_kernel(): bug: interrupts were enabled early\n"); @@ -737,6 +742,7 @@ static void __init do_basic_setup(void) usermodehelper_init(); driver_init(); init_irq_proc(); + ipipe_init_proc(); do_initcalls(); } =20 Index: linux-2.6.23/kernel/Makefile =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/Makefile +++ linux-2.6.23/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) +=3D irq/ obj-$(CONFIG_SECCOMP) +=3D seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) +=3D rcutorture.o obj-$(CONFIG_RELAY) +=3D relay.o +obj-$(CONFIG_IPIPE) +=3D ipipe/ obj-$(CONFIG_SYSCTL) +=3D utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) +=3D delayacct.o obj-$(CONFIG_TASKSTATS) +=3D taskstats.o tsacct.o @@ -74,3 +75,5 @@ quiet_cmd_ikconfiggz =3D IKCFG $@ targets +=3D config_data.h $(obj)/config_data.h: $(obj)/config_data.gz FORCE $(call if_changed,ikconfiggz) + +obj-$(CONFIG_XENOMAI) +=3D xenomai/ Index: linux-2.6.23/kernel/exit.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/exit.c +++ linux-2.6.23/kernel/exit.c @@ -973,6 +973,7 @@ fastcall NORET_TYPE void do_exit(long co =20 if (group_dead) acct_process(); + ipipe_exit_notify(tsk); exit_sem(tsk); __exit_files(tsk); __exit_fs(tsk); Index: linux-2.6.23/kernel/fork.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/fork.c +++ linux-2.6.23/kernel/fork.c @@ -392,6 +392,7 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { exit_aio(mm); exit_mmap(mm); + ipipe_cleanup_notify(mm); if (!list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); list_del(&mm->mmlist); @@ -925,7 +926,7 @@ static inline void copy_flags(unsigned l { unsigned long new_flags =3D p->flags; =20 - new_flags &=3D ~PF_SUPERPRIV; + new_flags &=3D ~(PF_SUPERPRIV | PF_EVNOTIFY); new_flags |=3D PF_FORKNOEXEC; if (!(clone_flags & CLONE_PTRACE)) p->ptrace =3D 0; @@ -1274,6 +1275,14 @@ static struct task_struct *copy_process( spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); +#ifdef CONFIG_IPIPE + { + int k; + + for (k =3D 0; k < IPIPE_ROOT_NPTDKEYS; k++) + p->ptd[k] =3D NULL; + } +#endif /* CONFIG_IPIPE */ return p; =20 bad_fork_cleanup_namespaces: Index: linux-2.6.23/kernel/ipipe/Kconfig =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/kernel/ipipe/Kconfig @@ -0,0 +1,25 @@ +config IPIPE + bool "Interrupt pipeline" + default y + ---help--- + Activate this option if you want the interrupt pipeline to be + compiled in. + +config IPIPE_DOMAINS + int "Max domains" + depends on IPIPE + default 4 + ---help--- + The maximum number of I-pipe domains to run concurrently. + +config IPIPE_COMPAT + bool "Maintain code compatibility with older releases" + depends on IPIPE + default y + ---help--- + Activate this option if you want the compatibility code to be + defined, so that older I-pipe clients may use obsolete + constructs. WARNING: obsolete code will be eventually + deprecated in future I-pipe releases, and removed from the + compatibility support as time passes. Please fix I-pipe + clients to get rid of such uses as soon as possible. Index: linux-2.6.23/kernel/ipipe/Kconfig.debug =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/kernel/ipipe/Kconfig.debug @@ -0,0 +1,88 @@ +config IPIPE_DEBUG + bool "I-pipe debugging" + depends on IPIPE + +config IPIPE_DEBUG_CONTEXT + bool "Check for illicit cross-domain calls" + depends on IPIPE_DEBUG + default y + ---help--- + Enable this feature to arm checkpoints in the kernel that + verify the correct invocation context. On entry of critical + Linux services a warning is issued if the caller is not + running over the root domain. + +config IPIPE_TRACE + bool "Latency tracing" + depends on IPIPE_DEBUG + select FRAME_POINTER + select KALLSYMS + select PROC_FS + ---help--- + Activate this option if you want to use per-function tracing of + the kernel. The tracer will collect data via instrumentation + features like the one below or with the help of explicite calls + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the + in-kernel tracing API. The collected data and runtime control + is available via /proc/ipipe/trace/*. + +if IPIPE_TRACE + +config IPIPE_TRACE_ENABLE + bool "Enable tracing on boot" + default y + ---help--- + Disable this option if you want to arm the tracer after booting + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce + boot time on slow embedded devices due to the tracer overhead. + +config IPIPE_TRACE_MCOUNT + bool "Instrument function entries" + default y + ---help--- + When enabled, records every kernel function entry in the tracer + log. While this slows down the system noticeably, it provides + the highest level of information about the flow of events. + However, it can be switch off in order to record only explicit + I-pipe trace points. + +config IPIPE_TRACE_IRQSOFF + bool "Trace IRQs-off times" + default y + ---help--- + Activate this option if I-pipe shall trace the longest path + with hard-IRQs switched off. + +config IPIPE_TRACE_SHIFT + int "Depth of trace log (14 =3D> 16Kpoints, 15 =3D> 32Kpoints)" + range 10 18 + default 14 + ---help--- + The number of trace points to hold tracing data for each + trace path, as a power of 2. + +config IPIPE_TRACE_VMALLOC + bool "Use vmalloc'ed trace buffer" + default y if EMBEDDED + ---help--- + Instead of reserving static kernel data, the required buffer + is allocated via vmalloc during boot-up when this option is + enabled. This can help to start systems that are low on memory, + but it slightly degrades overall performance. Try this option + when a traced kernel hangs unexpectedly at boot time. + +config IPIPE_TRACE_PANIC + bool "Enable panic back traces" + default y + ---help--- + Provides services to freeze and dump a back trace on panic + situations. This is used on IPIPE_DEBUG_CONTEXT exceptions + as well as ordinary kernel oopses. You can control the number + of printed back trace points via /proc/ipipe/trace. + +config IPIPE_TRACE_ENABLE_VALUE + int + default 0 if !IPIPE_TRACE_ENABLE + default 1 if IPIPE_TRACE_ENABLE + +endif Index: linux-2.6.23/kernel/ipipe/Makefile =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/kernel/ipipe/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_IPIPE) +=3D core.o +obj-$(CONFIG_IPIPE_TRACE) +=3D tracer.o Index: linux-2.6.23/kernel/ipipe/core.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/kernel/ipipe/core.c @@ -0,0 +1,1623 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + * + * Architecture-independent I-PIPE core support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#include +#endif /* CONFIG_PROC_FS */ +#include +#include + +static int __ipipe_ptd_key_count; + +static unsigned long __ipipe_ptd_key_map; + +static unsigned long __ipipe_domain_slot_map; + +struct ipipe_domain ipipe_root; + +#ifndef CONFIG_SMP +/* + * Create an alias to the unique root status, so that arch-dep code + * may get simple and easy access to this percpu variable. We also + * create an array of pointers to the percpu domain data; this tends + * to produce a better code when reaching non-root domains. We make + * sure that the early boot code would be able to dereference the + * pointer to the root domain data safely by statically initializing + * its value (local_irq*() routines depend on this). + */ +#if __GNUC__ >=3D 4 +extern unsigned long __ipipe_root_status +__attribute__((alias(__stringify(__raw_get_cpu_var(ipipe_percpu_darray))= ))); +EXPORT_SYMBOL(__ipipe_root_status); +#else /* __GNUC__ < 4 */ +/* + * Work around a GCC 3.x issue making alias symbols unusable as + * constant initializers. + */ +unsigned long *const __ipipe_root_status_addr =3D &__raw_get_cpu_var(ipi= pe_percpu_darray); +EXPORT_SYMBOL(__ipipe_root_status_addr); +#endif /* __GNUC__ < 4 */ + +DEFINE_PER_CPU(struct ipipe_percpu_domain_data *, ipipe_percpu_daddr[CON= FIG_IPIPE_DOMAINS]) =3D +{ [0] =3D (struct ipipe_percpu_domain_data *)&__raw_get_cpu_var(ipipe_pe= rcpu_darray) }; +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_daddr); +#endif /* !CONFIG_SMP */ + +DEFINE_PER_CPU(struct ipipe_percpu_domain_data, ipipe_percpu_darray[CONF= IG_IPIPE_DOMAINS]) =3D +{ [0] =3D { .status =3D IPIPE_STALL_MASK } }; /* Root domain stalled on = each CPU at startup. */ + +DEFINE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain) =3D { &ipipe_= root }; + +static IPIPE_DEFINE_SPINLOCK(__ipipe_pipelock); + +LIST_HEAD(__ipipe_pipeline); + +unsigned long __ipipe_virtual_irq_map; + +#ifdef CONFIG_PRINTK +unsigned __ipipe_printk_virq; +#endif /* CONFIG_PRINTK */ + +int __ipipe_event_monitors[IPIPE_NR_EVENTS]; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS + +DECLARE_PER_CPU(struct tick_device, tick_cpu_device); + +static DEFINE_PER_CPU(struct ipipe_tick_device, ipipe_tick_cpu_device); + +static void __ipipe_set_tick_mode(enum clock_event_mode mode, + struct clock_event_device *cdev) +{ + struct ipipe_tick_device *itd; + itd =3D &per_cpu(ipipe_tick_cpu_device, smp_processor_id()); + itd->emul_set_mode(mode, itd); +} + +static int __ipipe_set_next_tick(unsigned long evt, + struct clock_event_device *cdev) +{ + uint64_t delta_ns =3D (uint64_t)cdev->delta; + struct ipipe_tick_device *itd; + + if (delta_ns > ULONG_MAX) + delta_ns =3D ULONG_MAX; + + itd =3D &per_cpu(ipipe_tick_cpu_device, smp_processor_id()); + return itd->emul_set_tick((unsigned long)delta_ns, itd); +} + +int ipipe_request_tickdev(const char *devname, + void (*emumode)(enum clock_event_mode mode, + struct ipipe_tick_device *tdev), + int (*emutick)(unsigned long delta, + struct ipipe_tick_device *tdev), + int cpu) +{ + struct ipipe_tick_device *itd; + struct tick_device *slave; + unsigned long flags; + int status; + + flags =3D ipipe_critical_enter(NULL); + + itd =3D &per_cpu(ipipe_tick_cpu_device, cpu); + + if (itd->slave !=3D NULL) { + status =3D -EBUSY; + goto out; + } + + slave =3D &per_cpu(tick_cpu_device, cpu); + + if (strcmp(slave->evtdev->name, devname)) { + /* + * No conflict so far with the current tick device, + * check whether the requested device is sane and has + * been blessed by the kernel. + */ + status =3D __ipipe_check_tickdev(devname) ? + CLOCK_EVT_MODE_UNUSED : CLOCK_EVT_MODE_SHUTDOWN; + goto out; + } + + /* + * Our caller asks for using the same clock event device for + * ticking than we do, let's create a tick emulation device to + * interpose on the set_next_event() method, so that we may + * both manage the device in oneshot mode. Only the tick + * emulation code will actually program the clockchip hardware + * for the next shot, though. + * + * CAUTION: we still have to grab the tick device even when it + * current runs in periodic mode, since the kernel may switch + * to oneshot dynamically (highres/no_hz tick mode). + */ + + itd->slave =3D slave; + itd->emul_set_mode =3D emumode; + itd->emul_set_tick =3D emutick; + itd->real_set_mode =3D slave->evtdev->set_mode; + itd->real_set_tick =3D slave->evtdev->set_next_event; + slave->evtdev->set_mode =3D __ipipe_set_tick_mode; + slave->evtdev->set_next_event =3D __ipipe_set_next_tick; + status =3D slave->evtdev->mode; +out: + ipipe_critical_exit(flags); + + return status; +} + +void ipipe_release_tickdev(int cpu) +{ + struct ipipe_tick_device *itd; + struct tick_device *slave; + unsigned long flags; + + flags =3D ipipe_critical_enter(NULL); + + itd =3D &per_cpu(ipipe_tick_cpu_device, cpu); + + if (itd->slave !=3D NULL) { + slave =3D &per_cpu(tick_cpu_device, cpu); + slave->evtdev->set_mode =3D itd->real_set_mode; + slave->evtdev->set_next_event =3D itd->real_set_tick; + itd->slave =3D NULL; + } + + ipipe_critical_exit(flags); +} + +#endif /* CONFIG_GENERIC_CLOCKEVENTS */ + +/* + * ipipe_init() -- Initialization routine of the IPIPE layer. Called + * by the host kernel early during the boot procedure. + */ +void __init ipipe_init(void) +{ + struct ipipe_domain *ipd =3D &ipipe_root; + + __ipipe_check_platform(); /* Do platform dependent checks first. */ + + /* + * A lightweight registration code for the root domain. We are + * running on the boot CPU, hw interrupts are off, and + * secondary CPUs are still lost in space. + */ + + /* Reserve percpu data slot #0 for the root domain. */ + ipd->slot =3D 0; + set_bit(0, &__ipipe_domain_slot_map); + + ipd->name =3D "Linux"; + ipd->domid =3D IPIPE_ROOT_ID; + ipd->priority =3D IPIPE_ROOT_PRIO; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + list_add_tail(&ipd->p_link, &__ipipe_pipeline); + + __ipipe_init_platform(); + +#ifdef CONFIG_PRINTK + __ipipe_printk_virq =3D ipipe_alloc_virq(); /* Cannot fail here. */ + ipd->irqs[__ipipe_printk_virq].handler =3D &__ipipe_flush_printk; + ipd->irqs[__ipipe_printk_virq].cookie =3D NULL; + ipd->irqs[__ipipe_printk_virq].acknowledge =3D NULL; + ipd->irqs[__ipipe_printk_virq].control =3D IPIPE_HANDLE_MASK; +#endif /* CONFIG_PRINTK */ + + __ipipe_enable_pipeline(); + + printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", + IPIPE_VERSION_STRING); +} + +void __ipipe_init_stage(struct ipipe_domain *ipd) +{ + int cpu, n; + + for_each_online_cpu(cpu) { + + ipipe_percpudom(ipd, irqpend_himask, cpu) =3D 0; + + for (n =3D 0; n < IPIPE_IRQ_IWORDS; n++) { + ipipe_percpudom(ipd, irqpend_lomask, cpu)[n] =3D 0; + ipipe_percpudom(ipd, irqheld_mask, cpu)[n] =3D 0; + } + + for (n =3D 0; n < IPIPE_NR_IRQS; n++) + ipipe_percpudom(ipd, irqall, cpu)[n] =3D 0; + + ipipe_percpudom(ipd, evsync, cpu) =3D 0; + } + + for (n =3D 0; n < IPIPE_NR_IRQS; n++) { + ipd->irqs[n].acknowledge =3D NULL; + ipd->irqs[n].handler =3D NULL; + ipd->irqs[n].control =3D IPIPE_PASS_MASK; /* Pass but don't handle */ + } + + for (n =3D 0; n < IPIPE_NR_EVENTS; n++) + ipd->evhand[n] =3D NULL; + + ipd->evself =3D 0LL; + mutex_init(&ipd->mutex); + + __ipipe_hook_critical_ipi(ipd); +} + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd) +{ + ipipe_unstall_pipeline_from(ipd); + +#ifdef CONFIG_SMP + { + int cpu; + + for_each_online_cpu(cpu) { + while (ipipe_percpudom(ipd, irqpend_himask, cpu) !=3D 0) + cpu_relax(); + } + } +#else + __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] =3D NULL; +#endif + + clear_bit(ipd->slot, &__ipipe_domain_slot_map); +} + +void __ipipe_unstall_root(void) +{ + BUG_ON(!ipipe_root_domain_p); + + local_irq_disable_hw(); + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)); + + if (unlikely(ipipe_root_cpudom_var(irqpend_himask) !=3D 0)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + local_irq_enable_hw(); +} + +void __ipipe_restore_root(unsigned long x) +{ + BUG_ON(!ipipe_root_domain_p); + + if (x) + __ipipe_stall_root(); + else + __ipipe_unstall_root(); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd) +{ + set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); +} + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_d= omain *ipd) +{ + unsigned long x; + + x =3D test_and_set_bit_safe(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, st= atus)); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); + + return x; +} + +/* + * ipipe_unstall_pipeline_from() -- Unstall the pipeline and + * synchronize pending interrupts for a given domain. See + * __ipipe_walk_pipeline() for more information. + */ +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) +{ + struct list_head *pos; + unsigned long flags; + + local_irq_save_hw(flags); + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); + + if (ipd =3D=3D ipipe_current_domain) + pos =3D &ipd->p_link; + else + pos =3D __ipipe_pipeline.next; + + __ipipe_walk_pipeline(pos); + + if (likely(__ipipe_pipeline_head_p(ipd))) + local_irq_enable_hw(); + else + local_irq_restore_hw(flags); +} + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe= _domain *ipd) +{ + unsigned long x; + + x =3D test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status)); + ipipe_unstall_pipeline_from(ipd); + + return x; +} + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x) +{ + if (x) + ipipe_stall_pipeline_from(ipd); + else + ipipe_unstall_pipeline_from(ipd); +} + +void ipipe_unstall_pipeline_head(void) +{ + struct ipipe_domain *head_domain; + + local_irq_disable_hw(); + + head_domain =3D __ipipe_pipeline_head(); + __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status)); + + if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) !=3D 0)) { + if (likely(head_domain =3D=3D ipipe_current_domain)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head_domain->p_link); + } + + local_irq_enable_hw(); +} + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head_do= main, unsigned long x) +{ + local_irq_disable_hw(); + + if (x) { +#ifdef CONFIG_DEBUG_KERNEL + static int warned; + if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(he= ad_domain, status))) { + /* + * Already stalled albeit ipipe_restore_pipeline_head() + * should have detected it? Send a warning once. + */ + warned =3D 1; + printk(KERN_WARNING + "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); + dump_stack(); + } +#else /* !CONFIG_DEBUG_KERNEL */ + set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status)); +#endif /* CONFIG_DEBUG_KERNEL */ + } + else { + __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status));= + if (unlikely(ipipe_cpudom_var(head_domain, irqpend_himask) !=3D 0)) { + if (likely(head_domain =3D=3D ipipe_current_domain)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head_domain->p_link); + } + local_irq_enable_hw(); + } +} + +void fastcall __ipipe_spin_lock_irq(raw_spinlock_t *lock) +{ + local_irq_disable_hw(); + __raw_spin_lock(lock); + __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); +} + +void fastcall __ipipe_spin_unlock_irq(raw_spinlock_t *lock) +{ + __raw_spin_unlock(lock); + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); + local_irq_enable_hw(); +} + +unsigned long fastcall __ipipe_spin_lock_irqsave(raw_spinlock_t *lock) +{ + unsigned long flags; + int s; + + local_irq_save_hw(flags); + __raw_spin_lock(lock); + s =3D __test_and_set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(statu= s)); + + return raw_mangle_irq_bits(s, flags); +} + +void fastcall __ipipe_spin_unlock_irqrestore(raw_spinlock_t *lock, unsig= ned long x) +{ + __raw_spin_unlock(lock); + if (!raw_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); + local_irq_restore_hw(x); +} + +void fastcall __ipipe_spin_unlock_irqbegin(ipipe_spinlock_t *lock) +{ + __raw_spin_unlock(&lock->__raw_lock); +} + +void fastcall __ipipe_spin_unlock_irqcomplete(unsigned long x) +{ + if (!raw_demangle_irq_bits(&x)) + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); + local_irq_restore_hw(x); +} + +/* Must be called hw IRQs off. */ +void fastcall __ipipe_set_irq_pending(struct ipipe_domain *ipd, unsigned= irq) +{ + int level =3D irq >> IPIPE_IRQ_ISHIFT, rank =3D irq & IPIPE_IRQ_IMASK; + + if (likely(!test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))) { + __set_bit(rank, &ipipe_cpudom_var(ipd, irqpend_lomask)[level]); + __set_bit(level,&ipipe_cpudom_var(ipd, irqpend_himask)); + } else + __set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]); + + ipipe_cpudom_var(ipd, irqall)[irq]++; +} + +/* Must be called hw IRQs off. */ +void fastcall __ipipe_lock_irq(struct ipipe_domain *ipd, int cpu, unsign= ed irq) +{ + if (likely(!test_and_set_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control))= ) { + int level =3D irq >> IPIPE_IRQ_ISHIFT, rank =3D irq & IPIPE_IRQ_IMASK;= + if (__test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, c= pu)[level])) + __set_bit(rank, &ipipe_cpudom_var(ipd, irqheld_mask)[level]); + if (ipipe_percpudom(ipd, irqpend_lomask, cpu)[level] =3D=3D 0) + __clear_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu)); + } +} + +/* Must be called hw IRQs off. */ +void fastcall __ipipe_unlock_irq(struct ipipe_domain *ipd, unsigned irq)= +{ + int cpu; + + if (likely(test_and_clear_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)= )) { + int level =3D irq >> IPIPE_IRQ_ISHIFT, rank =3D irq & IPIPE_IRQ_IMASK;= + for_each_online_cpu(cpu) { + if (test_and_clear_bit(rank, &ipipe_percpudom(ipd, irqheld_mask, cpu)= [level])) { + /* We need atomic ops here: */ + set_bit(rank, &ipipe_percpudom(ipd, irqpend_lomask, cpu)[level]); + set_bit(level, &ipipe_percpudom(ipd, irqpend_himask, cpu)); + } + } + } +} + +/* __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must + be called with local hw interrupts disabled. */ + +void fastcall __ipipe_walk_pipeline(struct list_head *pos) +{ + struct ipipe_domain *this_domain =3D ipipe_current_domain, *next_domain= ; + + while (pos !=3D &__ipipe_pipeline) { + + next_domain =3D list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status))= ) + break; /* Stalled stage -- do not go further. */ + + if (ipipe_cpudom_var(next_domain, irqpend_himask) !=3D 0) { + + if (next_domain =3D=3D this_domain) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else { + + ipipe_cpudom_var(this_domain, evsync) =3D 0; + ipipe_current_domain =3D next_domain; + ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ + + if (ipipe_current_domain =3D=3D next_domain) + ipipe_current_domain =3D this_domain; + /* + * Otherwise, something changed the current domain under our + * feet recycling the register set; do not override the new + * domain. + */ + + if (ipipe_cpudom_var(this_domain, irqpend_himask) !=3D 0 && + !test_bit(IPIPE_STALL_FLAG, + &ipipe_cpudom_var(this_domain, status))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + } + + break; + } else if (next_domain =3D=3D this_domain) + break; + + pos =3D next_domain->p_link.next; + } +} + +/* + * ipipe_suspend_domain() -- Suspend the current domain, switching to + * the next one which has pending work down the pipeline. + */ +void ipipe_suspend_domain(void) +{ + struct ipipe_domain *this_domain, *next_domain; + struct list_head *ln; + unsigned long flags; + + local_irq_save_hw(flags); + + this_domain =3D next_domain =3D ipipe_current_domain; + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(this_domain, status)); + + if (ipipe_cpudom_var(this_domain, irqpend_himask) !=3D 0) + goto sync_stage; + + for (;;) { + ln =3D next_domain->p_link.next; + + if (ln =3D=3D &__ipipe_pipeline) + break; + + next_domain =3D list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, + &ipipe_cpudom_var(next_domain, status)) !=3D 0) + break; + + if (ipipe_cpudom_var(next_domain, irqpend_himask) =3D=3D 0) + continue; + + ipipe_current_domain =3D next_domain; + +sync_stage: + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + if (ipipe_current_domain !=3D next_domain) + /* + * Something has changed the current domain under our + * feet, recycling the register set; take note. + */ + this_domain =3D ipipe_current_domain; + } + + ipipe_current_domain =3D this_domain; + + local_irq_restore_hw(flags); +} + +/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. + * Virtual interrupts are handled in exactly the same way than their + * hw-generated counterparts wrt pipelining. + */ +unsigned ipipe_alloc_virq(void) +{ + unsigned long flags, irq =3D 0; + int ipos; + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + if (__ipipe_virtual_irq_map !=3D ~0) { + ipos =3D ffz(__ipipe_virtual_irq_map); + set_bit(ipos, &__ipipe_virtual_irq_map); + irq =3D ipos + IPIPE_VIRQ_BASE; + } + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return irq; +} + +/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw + acknowledge routine) to an interrupt for a given domain. */ + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask) +{ + unsigned long flags; + int err; + + if (irq >=3D IPIPE_NR_IRQS) + return -EINVAL; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) + /* Silently unwire interrupts for non-heading domains. */ + modemask &=3D ~IPIPE_WIRED_MASK; + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + if (handler !=3D NULL) { + if (handler =3D=3D IPIPE_SAME_HANDLER) { + handler =3D ipd->irqs[irq].handler; + cookie =3D ipd->irqs[irq].cookie; + + if (handler =3D=3D NULL) { + err =3D -EINVAL; + goto unlock_and_exit; + } + } else if ((modemask & IPIPE_EXCLUSIVE_MASK) !=3D 0 && + ipd->irqs[irq].handler !=3D NULL) { + err =3D -EBUSY; + goto unlock_and_exit; + } + + /* Wired interrupts can only be delivered to domains + * always heading the pipeline, and using dynamic + * propagation. */ + + if ((modemask & IPIPE_WIRED_MASK) !=3D 0) { + if ((modemask & (IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) !=3D 0) { + err =3D -EINVAL; + goto unlock_and_exit; + } + modemask |=3D (IPIPE_HANDLE_MASK); + } + + if ((modemask & IPIPE_STICKY_MASK) !=3D 0) + modemask |=3D IPIPE_HANDLE_MASK; + } else + modemask &=3D + ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | + IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); + + if (acknowledge =3D=3D NULL && !ipipe_virtual_irq_p(irq)) + /* Acknowledge handler unspecified for a hw interrupt: + use the Linux-defined handler instead. */ + acknowledge =3D ipipe_root_domain->irqs[irq].acknowledge; + + ipd->irqs[irq].handler =3D handler; + ipd->irqs[irq].cookie =3D cookie; + ipd->irqs[irq].acknowledge =3D acknowledge; + ipd->irqs[irq].control =3D modemask; + + if (irq < NR_IRQS && handler !=3D NULL && !ipipe_virtual_irq_p(irq)) { + __ipipe_enable_irqdesc(ipd, irq); + + if ((modemask & IPIPE_ENABLE_MASK) !=3D 0) { + if (ipd !=3D ipipe_current_domain) { + /* IRQ enable/disable state is domain-sensitive, so we may + not change it for another domain. What is allowed + however is forcing some domain to handle an interrupt + source, by passing the proper 'ipd' descriptor which + thus may be different from ipipe_current_domain. */ + err =3D -EPERM; + goto unlock_and_exit; + } + __ipipe_enable_irq(irq); + } + } + + err =3D 0; + + unlock_and_exit: + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return err; +} + +/* ipipe_control_irq() -- Change modes of a pipelined interrupt for + * the current domain. */ + +int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) +{ + struct ipipe_domain *ipd; + unsigned long flags; + + if (irq >=3D IPIPE_NR_IRQS) + return -EINVAL; + + ipd =3D ipipe_current_domain; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (ipd->irqs[irq].handler =3D=3D NULL) + setmask &=3D ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + if ((setmask & IPIPE_STICKY_MASK) !=3D 0) + setmask |=3D IPIPE_HANDLE_MASK; + + if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) !=3D 0) /* If o= ne goes, both go. */ + clrmask |=3D (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + spin_lock_irqsave(&__ipipe_pipelock, flags); + + ipd->irqs[irq].control &=3D ~clrmask; + ipd->irqs[irq].control |=3D setmask; + + if ((setmask & IPIPE_ENABLE_MASK) !=3D 0) + __ipipe_enable_irq(irq); + else if ((clrmask & IPIPE_ENABLE_MASK) !=3D 0) + __ipipe_disable_irq(irq); + + spin_unlock_irqrestore(&__ipipe_pipelock, flags); + + return 0; +} + +/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ + +int fastcall __ipipe_dispatch_event (unsigned event, void *data) +{ + struct ipipe_domain *start_domain, *this_domain, *next_domain; + ipipe_event_handler_t evhand; + struct list_head *pos, *npos; + unsigned long flags; + int propagate =3D 1; + + local_irq_save_hw(flags); + + start_domain =3D this_domain =3D ipipe_current_domain; + + list_for_each_safe(pos, npos, &__ipipe_pipeline) { + /* + * Note: Domain migration may occur while running + * event or interrupt handlers, in which case the + * current register set is going to be recycled for a + * different domain than the initiating one. We do + * care for that, always tracking the current domain + * descriptor upon return from those handlers. + */ + next_domain =3D list_entry(pos, struct ipipe_domain, p_link); + + /* + * Keep a cached copy of the handler's address since + * ipipe_catch_event() may clear it under our feet. + */ + evhand =3D next_domain->evhand[event]; + + if (evhand !=3D NULL) { + ipipe_current_domain =3D next_domain; + ipipe_cpudom_var(next_domain, evsync) |=3D (1LL << event); + local_irq_restore_hw(flags); + propagate =3D !evhand(event, start_domain, data); + local_irq_save_hw(flags); + ipipe_cpudom_var(next_domain, evsync) &=3D ~(1LL << event); + if (ipipe_current_domain !=3D next_domain) + this_domain =3D ipipe_current_domain; + } + + if (next_domain !=3D ipipe_root_domain && /* NEVER sync the root stage= here. */ + ipipe_cpudom_var(next_domain, irqpend_himask) !=3D 0 && + !test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(next_domain, status)= )) { + ipipe_current_domain =3D next_domain; + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + if (ipipe_current_domain !=3D next_domain) + this_domain =3D ipipe_current_domain; + } + + ipipe_current_domain =3D this_domain; + + if (next_domain =3D=3D this_domain || !propagate) + break; + } + + local_irq_restore_hw(flags); + + return !propagate; +} + +/* + * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired + * interrupts are immediately and unconditionally delivered to the + * domain heading the pipeline upon receipt, and such domain must have + * been registered as an invariant head for the system (priority =3D=3D + * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is + * to get an extra-fast dispatching path for those IRQs, by relying on + * a straightforward logic based on assumptions that must always be + * true for invariant head domains. The following assumptions are + * made when dealing with such interrupts: + * + * 1- Wired interrupts are purely dynamic, i.e. the decision to + * propagate them down the pipeline must be done from the head domain + * ISR. + * 2- Wired interrupts cannot be shared or sticky. + * 3- The root domain cannot be an invariant pipeline head, in + * consequence of what the root domain cannot handle wired + * interrupts. + * 4- Wired interrupts must have a valid acknowledge handler for the + * head domain (if needed), and in any case, must not rely on handlers + * provided by lower priority domains during the acknowledge cycle + * (see __ipipe_handle_irq). + * + * Called with hw interrupts off. + */ + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head_domain, un= signed irq) +{ + struct ipipe_domain *old; + + if (test_bit(IPIPE_LOCK_FLAG, &head_domain->irqs[irq].control)) { + /* If we can't process this IRQ right now, we must + * mark it as held, so that it will get played during + * normal log sync when the corresponding interrupt + * source is eventually unlocked. */ + ipipe_cpudom_var(head_domain, irqall)[irq]++; + __set_bit(irq & IPIPE_IRQ_IMASK, &ipipe_cpudom_var(head_domain, irqhel= d_mask)[irq >> IPIPE_IRQ_ISHIFT]); + return 0; + } + + if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status)))= { + __ipipe_set_irq_pending(head_domain, irq); + return 0; + } + + old =3D ipipe_current_domain; + ipipe_current_domain =3D head_domain; /* Switch to the head domain. */ + + ipipe_cpudom_var(head_domain, irqall)[irq]++; + __set_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status)); + head_domain->irqs[irq].handler(irq, head_domain->irqs[irq].cookie); /* = Call the ISR. */ + __ipipe_run_irqtail(); + __clear_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(head_domain, status)); + + /* We expect the caller to start a complete pipeline walk upon + * return, so that propagated interrupts will get played. */ + + if (ipipe_current_domain =3D=3D head_domain) + ipipe_current_domain =3D old; /* Back to the preempted domain. */ + + return 1; +} + +/* + * __ipipe_sync_stage() -- Flush the pending IRQs for the current + * domain (and processor). This routine flushes the interrupt log + * (see "Optimistic interrupt protection" from D. Stodolsky et al. for + * more on the deferred interrupt scheme). Every interrupt that + * occurred while the pipeline was stalled gets played. WARNING: + * callers on SMP boxen should always check for CPU migration on + * return of this routine. One can control the kind of interrupts + * which are going to be sync'ed using the syncmask + * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT + * plays virtual interrupts only. + * + * This routine must be called with hw interrupts off. + */ +void fastcall __ipipe_sync_stage(unsigned long syncmask) +{ + unsigned long mask, submask; + struct ipipe_domain *ipd; + int level, rank, cpu; + unsigned irq; + + if (__test_and_set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status))= ) + return; + + ipd =3D ipipe_current_domain; + cpu =3D ipipe_processor_id(); + + /* + * The policy here is to keep the dispatching code interrupt-free + * by stalling the current stage. If the upper domain handler + * (which we call) wants to re-enable interrupts while in a safe + * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's + * sigaction()), it will have to unstall (then stall again before + * returning to us!) the stage when it sees fit. + */ + while ((mask =3D (ipipe_this_cpudom_var(irqpend_himask) & syncmask)) !=3D= 0) { + level =3D __ipipe_ffnz(mask); + + while ((submask =3D ipipe_this_cpudom_var(irqpend_lomask)[level]) !=3D= 0) { + rank =3D __ipipe_ffnz(submask); + irq =3D (level << IPIPE_IRQ_ISHIFT) + rank; + + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) { + __clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]); + continue; + } + + __clear_bit(rank, &ipipe_this_cpudom_var(irqpend_lomask)[level]); + + if (ipipe_this_cpudom_var(irqpend_lomask)[level] =3D=3D 0) + __clear_bit(level, &ipipe_this_cpudom_var(irqpend_himask)); + + __set_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); + + if (ipd =3D=3D ipipe_root_domain) + trace_hardirqs_off(); + + __ipipe_run_isr(ipd, irq); +#ifdef CONFIG_SMP + { + int newcpu =3D ipipe_processor_id(); + + if (newcpu !=3D cpu) { /* Handle CPU migration. */ + /* + * We expect any domain to clear the SYNC bit each + * time it switches in a new task, so that preemptions + * and/or CPU migrations (in the SMP case) over the + * ISR do not lock out the log syncer for some + * indefinite amount of time. In the Linux case, + * schedule() handles this (see kernel/sched.c). For + * this reason, we don't bother clearing it here for + * the source CPU in the migration handling case, + * since it must have scheduled another task in by + * now. + */ + __set_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status)); + cpu =3D newcpu; + } + } +#endif /* CONFIG_SMP */ + if (ipd =3D=3D ipipe_root_domain && + test_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status))) + trace_hardirqs_on(); + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_this_cpudom_var(status)); + } + } + + __clear_bit(IPIPE_SYNC_FLAG, &ipipe_this_cpudom_var(status)); +} + +/* ipipe_register_domain() -- Link a new domain to the pipeline. */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr) +{ + struct ipipe_domain *_ipd; + struct list_head *pos; + unsigned long flags; + + if (!ipipe_root_domain_p) { + printk(KERN_WARNING + "I-pipe: Only the root domain may register a new domain.\n"); + return -EPERM; + } + + if (attr->priority =3D=3D IPIPE_HEAD_PRIORITY && + test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags)) + return -EAGAIN; /* Cannot override current head. */ + + flags =3D ipipe_critical_enter(NULL); + + pos =3D NULL; + ipd->slot =3D ffz(__ipipe_domain_slot_map); + + if (ipd->slot < CONFIG_IPIPE_DOMAINS) { + set_bit(ipd->slot, &__ipipe_domain_slot_map); + list_for_each(pos, &__ipipe_pipeline) { + _ipd =3D list_entry(pos, struct ipipe_domain, p_link); + if (_ipd->domid =3D=3D attr->domid) + break; + } + } + + ipipe_critical_exit(flags); + + if (pos !=3D &__ipipe_pipeline) { + if (ipd->slot < CONFIG_IPIPE_DOMAINS) + clear_bit(ipd->slot, &__ipipe_domain_slot_map); + return -EBUSY; + } + +#ifndef CONFIG_SMP + /* + * Set up the perdomain pointers for direct access to the + * percpu domain data. This saves a costly multiply each time + * we need to refer to the contents of the percpu domain data + * array. + */ + __raw_get_cpu_var(ipipe_percpu_daddr)[ipd->slot] =3D &__raw_get_cpu_var= (ipipe_percpu_darray)[ipd->slot]; +#endif + + ipd->name =3D attr->name; + ipd->domid =3D attr->domid; + ipd->pdd =3D attr->pdd; + ipd->flags =3D 0; + + if (attr->priority =3D=3D IPIPE_HEAD_PRIORITY) { + ipd->priority =3D INT_MAX; + __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); + } + else + ipd->priority =3D attr->priority; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + +#ifdef CONFIG_PROC_FS + __ipipe_add_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + flags =3D ipipe_critical_enter(NULL); + + list_for_each(pos, &__ipipe_pipeline) { + _ipd =3D list_entry(pos, struct ipipe_domain, p_link); + if (ipd->priority > _ipd->priority) + break; + } + + list_add_tail(&ipd->p_link, pos); + + ipipe_critical_exit(flags); + + printk(KERN_INFO "I-pipe: Domain %s registered.\n", ipd->name); + + /* + * Finally, allow the new domain to perform its initialization + * chores. + */ + + if (attr->entry !=3D NULL) { + ipipe_current_domain =3D ipd; + attr->entry(); + ipipe_current_domain =3D ipipe_root_domain; + + local_irq_save_hw(flags); + + if (ipipe_root_cpudom_var(irqpend_himask) !=3D 0 && + !test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + local_irq_restore_hw(flags); + } + + return 0; +} + +/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ + +int ipipe_unregister_domain(struct ipipe_domain *ipd) +{ + unsigned long flags; + + if (!ipipe_root_domain_p) { + printk(KERN_WARNING + "I-pipe: Only the root domain may unregister a domain.\n"); + return -EPERM; + } + + if (ipd =3D=3D ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Cannot unregister the root domain.\n"); + return -EPERM; + } +#ifdef CONFIG_SMP + { + unsigned irq; + int cpu; + + /* + * In the SMP case, wait for the logged events to drain on + * other processors before eventually removing the domain + * from the pipeline. + */ + + ipipe_unstall_pipeline_from(ipd); + + flags =3D ipipe_critical_enter(NULL); + + for (irq =3D 0; irq < IPIPE_NR_IRQS; irq++) { + clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); + clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); + set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); + } + + ipipe_critical_exit(flags); + + for_each_online_cpu(cpu) { + while (ipipe_percpudom(ipd, irqpend_himask, cpu) > 0) + cpu_relax(); + } + } +#endif /* CONFIG_SMP */ + + mutex_lock(&ipd->mutex); + +#ifdef CONFIG_PROC_FS + __ipipe_remove_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + /* + * Simply remove the domain from the pipeline and we are almost done. + */ + + flags =3D ipipe_critical_enter(NULL); + list_del_init(&ipd->p_link); + ipipe_critical_exit(flags); + + __ipipe_cleanup_domain(ipd); + + mutex_unlock(&ipd->mutex); + + printk(KERN_INFO "I-pipe: Domain %s unregistered.\n", ipd->name); + + return 0; +} + +/* + * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of + * a running interrupt handler to the next domain down the pipeline. + * ipipe_schedule_irq() -- Does almost the same as above, but attempts + * to pend the interrupt for the current domain first. + */ +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head) +{ + struct ipipe_domain *ipd; + struct list_head *ln; + unsigned long flags; + + if (irq >=3D IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + local_irq_save_hw(flags); + + ln =3D head; + + while (ln !=3D &__ipipe_pipeline) { + + ipd =3D list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { + __ipipe_set_irq_pending(ipd, irq); + local_irq_restore_hw(flags); + return 1; + } + + ln =3D ipd->p_link.next; + } + + local_irq_restore_hw(flags); + + return 0; +} + +/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ + +int ipipe_free_virq(unsigned virq) +{ + if (!ipipe_virtual_irq_p(virq)) + return -EINVAL; + + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); + + return 0; +} + +void ipipe_init_attr(struct ipipe_domain_attr *attr) +{ + attr->name =3D "anon"; + attr->domid =3D 1; + attr->entry =3D NULL; + attr->priority =3D IPIPE_ROOT_PRIO; + attr->pdd =3D NULL; +} + +/* + * ipipe_catch_event() -- Interpose or remove an event handler for a + * given domain. + */ +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler) +{ + ipipe_event_handler_t old_handler; + unsigned long flags; + int self =3D 0, cpu; + + if (event & IPIPE_EVENT_SELF) { + event &=3D ~IPIPE_EVENT_SELF; + self =3D 1; + } + + if (event >=3D IPIPE_NR_EVENTS) + return NULL; + + flags =3D ipipe_critical_enter(NULL); + + if (!(old_handler =3D xchg(&ipd->evhand[event],handler))) { + if (handler) { + if (self) + ipd->evself |=3D (1LL << event); + else + __ipipe_event_monitors[event]++; + } + } + else if (!handler) { + if (ipd->evself & (1LL << event)) + ipd->evself &=3D ~(1LL << event); + else + __ipipe_event_monitors[event]--; + } else if ((ipd->evself & (1LL << event)) && !self) { + __ipipe_event_monitors[event]++; + ipd->evself &=3D ~(1LL << event); + } else if (!(ipd->evself & (1LL << event)) && self) { + __ipipe_event_monitors[event]--; + ipd->evself |=3D (1LL << event); + } + + ipipe_critical_exit(flags); + + if (!handler && ipipe_root_domain_p) { + /* + * If we cleared a handler on behalf of the root + * domain, we have to wait for any current invocation + * to drain, since our caller might subsequently unmap + * the target domain. To this aim, this code + * synchronizes with __ipipe_dispatch_event(), + * guaranteeing that either the dispatcher sees a null + * handler in which case it discards the invocation + * (which also prevents from entering a livelock), or + * finds a valid handler and calls it. Symmetrically, + * ipipe_catch_event() ensures that the called code + * won't be unmapped under our feet until the event + * synchronization flag is cleared for the given event + * on all CPUs. + */ + + for_each_online_cpu(cpu) { + while (ipipe_percpudom(ipd, evsync, cpu) & (1LL << event)) + schedule_timeout_interruptible(HZ / 50); + } + } + + return old_handler; +} + +cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) +{ +#ifdef CONFIG_SMP + if (irq >=3D IPIPE_NR_XIRQS) + /* Allow changing affinity of external IRQs only. */ + return CPU_MASK_NONE; + + if (num_online_cpus() > 1) + return __ipipe_set_irq_affinity(irq,cpumask); +#endif /* CONFIG_SMP */ + + return CPU_MASK_NONE; +} + +int fastcall ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) + +{ +#ifdef CONFIG_SMP + return __ipipe_send_ipi(ipi,cpumask); +#else /* !CONFIG_SMP */ + return -EINVAL; +#endif /* CONFIG_SMP */ +} + +int ipipe_alloc_ptdkey (void) +{ + unsigned long flags; + int key =3D -1; + + spin_lock_irqsave(&__ipipe_pipelock,flags); + + if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { + key =3D ffz(__ipipe_ptd_key_map); + set_bit(key,&__ipipe_ptd_key_map); + __ipipe_ptd_key_count++; + } + + spin_unlock_irqrestore(&__ipipe_pipelock,flags); + + return key; +} + +int ipipe_free_ptdkey (int key) +{ + unsigned long flags; + + if (key < 0 || key >=3D IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + spin_lock_irqsave(&__ipipe_pipelock,flags); + + if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) + __ipipe_ptd_key_count--; + + spin_unlock_irqrestore(&__ipipe_pipelock,flags); + + return 0; +} + +int fastcall ipipe_set_ptd (int key, void *value) + +{ + if (key < 0 || key >=3D IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + current->ptd[key] =3D value; + + return 0; +} + +void fastcall *ipipe_get_ptd (int key) + +{ + if (key < 0 || key >=3D IPIPE_ROOT_NPTDKEYS) + return NULL; + + return current->ptd[key]; +} + +#ifdef CONFIG_PROC_FS + +struct proc_dir_entry *ipipe_proc_root; + +static int __ipipe_version_info_proc(char *page, + char **start, + off_t off, int count, int *eof, void *data) +{ + int len =3D sprintf(page, "%s\n", IPIPE_VERSION_STRING); + + len -=3D off; + + if (len <=3D off + count) + *eof =3D 1; + + *start =3D page + off; + + if(len > count) + len =3D count; + + if(len < 0) + len =3D 0; + + return len; +} + +static int __ipipe_common_info_show(struct seq_file *p, void *data) +{ + struct ipipe_domain *ipd =3D (struct ipipe_domain *)p->private; + char handling, stickiness, lockbit, exclusive, virtuality; + + unsigned long ctlbits; + unsigned irq; + + seq_printf(p, " +----- Handling ([A]ccepted, [G]rabbed, [W]ired, = [D]iscarded)\n"); + seq_printf(p, " |+---- Sticky\n"); + seq_printf(p, " ||+--- Locked\n"); + seq_printf(p, " |||+-- Exclusive\n"); + seq_printf(p, " ||||+- Virtual\n"); + seq_printf(p, "[IRQ] |||||\n"); + + mutex_lock(&ipd->mutex); + + for (irq =3D 0; irq < IPIPE_NR_IRQS; irq++) { + /* Remember to protect against + * ipipe_virtual_irq/ipipe_control_irq if more fields + * get involved. */ + ctlbits =3D ipd->irqs[irq].control; + + if (irq >=3D IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) + /* + * There might be a hole between the last external + * IRQ and the first virtual one; skip it. + */ + continue; + + if (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map)) + /* Non-allocated virtual IRQ; skip it. */ + continue; + + /* + * Statuses are as follows: + * o "accepted" means handled _and_ passed down the pipeline. + * o "grabbed" means handled, but the interrupt might be + * terminated _or_ passed down the pipeline depending on + * what the domain handler asks for to the I-pipe. + * o "wired" is basically the same as "grabbed", except that + * the interrupt is unconditionally delivered to an invariant + * pipeline head domain. + * o "passed" means unhandled by the domain but passed + * down the pipeline. + * o "discarded" means unhandled and _not_ passed down the + * pipeline. The interrupt merely disappears from the + * current domain down to the end of the pipeline. + */ + if (ctlbits & IPIPE_HANDLE_MASK) { + if (ctlbits & IPIPE_PASS_MASK) + handling =3D 'A'; + else if (ctlbits & IPIPE_WIRED_MASK) + handling =3D 'W'; + else + handling =3D 'G'; + } else if (ctlbits & IPIPE_PASS_MASK) + /* Do not output if no major action is taken. */ + continue; + else + handling =3D 'D'; + + if (ctlbits & IPIPE_STICKY_MASK) + stickiness =3D 'S'; + else + stickiness =3D '.'; + + if (ctlbits & IPIPE_LOCK_MASK) + lockbit =3D 'L'; + else + lockbit =3D '.'; + + if (ctlbits & IPIPE_EXCLUSIVE_MASK) + exclusive =3D 'X'; + else + exclusive =3D '.'; + + if (ipipe_virtual_irq_p(irq)) + virtuality =3D 'V'; + else + virtuality =3D '.'; + + seq_printf(p, " %3u: %c%c%c%c%c\n", + irq, handling, stickiness, lockbit, exclusive, virtuality); + } + + seq_printf(p, "[Domain info]\n"); + + seq_printf(p, "id=3D0x%.8x\n", ipd->domid); + + if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) + seq_printf(p, "priority=3Dtopmost\n"); + else + seq_printf(p, "priority=3D%d\n", ipd->priority); + + mutex_unlock(&ipd->mutex); + + return 0; +} + +static int __ipipe_common_info_open(struct inode *inode, struct file *fi= le) +{ + return single_open(file, __ipipe_common_info_show, PROC_I(inode)->pde->= data); +} + +static struct file_operations __ipipe_info_proc_ops =3D { + .owner =3D THIS_MODULE, + .open =3D __ipipe_common_info_open, + .read =3D seq_read, + .llseek =3D seq_lseek, + .release =3D single_release, +}; + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd) +{ + struct proc_dir_entry *e =3D create_proc_entry(ipd->name, 0444, ipipe_p= roc_root); + if (e) { + e->proc_fops =3D &__ipipe_info_proc_ops; + e->data =3D (void*) ipd; + } +} + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) +{ + remove_proc_entry(ipd->name,ipipe_proc_root); +} + +void __init ipipe_init_proc(void) +{ + ipipe_proc_root =3D create_proc_entry("ipipe",S_IFDIR, 0); + create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_= info_proc,NULL); + __ipipe_add_domain_proc(ipipe_root_domain); + + __ipipe_init_tracer(); +} + +#endif /* CONFIG_PROC_FS */ + +#ifdef CONFIG_IPIPE_DEBUG_CONTEXT + +DEFINE_PER_CPU(int, ipipe_percpu_context_check) =3D { 1 }; + +void ipipe_check_context(struct ipipe_domain *border_ipd) +{ + /* Note: We don't make the per_cpu access atomic. We assume that code + which temporarily disables the check does this in atomic context + only. */ + if (likely(ipipe_current_domain->priority <=3D border_ipd->priority) ||= + !per_cpu(ipipe_percpu_context_check, ipipe_processor_id())) + return; + + ipipe_context_check_off(); + + ipipe_trace_panic_freeze(); + ipipe_set_printk_sync(ipipe_current_domain); + printk(KERN_ERR "I-pipe: Detected illicit call from domain '%s'\n" + KERN_ERR " into a service reserved for domain '%s' and " + "below.\n", + ipipe_current_domain->name, border_ipd->name); + show_stack(NULL, NULL); + ipipe_trace_panic_dump(); +} + +EXPORT_SYMBOL(ipipe_check_context); +#endif /* CONFIG_IPIPE_DEBUG_CONTEXT */ + +EXPORT_SYMBOL(ipipe_virtualize_irq); +EXPORT_SYMBOL(ipipe_control_irq); +EXPORT_SYMBOL(ipipe_suspend_domain); +EXPORT_SYMBOL(ipipe_alloc_virq); +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_darray); +EXPORT_SYMBOL(ipipe_root); +EXPORT_SYMBOL(ipipe_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_restore_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_head); +EXPORT_SYMBOL(__ipipe_restore_pipeline_head); +EXPORT_SYMBOL(__ipipe_unstall_root); +EXPORT_SYMBOL(__ipipe_restore_root); +EXPORT_SYMBOL(__ipipe_spin_lock_irq); +EXPORT_SYMBOL(__ipipe_spin_unlock_irq); +EXPORT_SYMBOL(__ipipe_spin_lock_irqsave); +EXPORT_SYMBOL(__ipipe_spin_unlock_irqrestore); +EXPORT_SYMBOL(__ipipe_pipeline); +EXPORT_SYMBOL(__ipipe_lock_irq); +EXPORT_SYMBOL(__ipipe_unlock_irq); +EXPORT_SYMBOL(ipipe_register_domain); +EXPORT_SYMBOL(ipipe_unregister_domain); +EXPORT_SYMBOL(ipipe_free_virq); +EXPORT_SYMBOL(ipipe_init_attr); +EXPORT_SYMBOL(ipipe_catch_event); +EXPORT_SYMBOL(ipipe_alloc_ptdkey); +EXPORT_SYMBOL(ipipe_free_ptdkey); +EXPORT_SYMBOL(ipipe_set_ptd); +EXPORT_SYMBOL(ipipe_get_ptd); +EXPORT_SYMBOL(ipipe_set_irq_affinity); +EXPORT_SYMBOL(ipipe_send_ipi); +EXPORT_SYMBOL(__ipipe_schedule_irq); +#ifdef CONFIG_GENERIC_CLOCKEVENTS +EXPORT_SYMBOL(ipipe_request_tickdev); +EXPORT_SYMBOL(ipipe_release_tickdev); +#endif Index: linux-2.6.23/kernel/ipipe/tracer.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- /dev/null +++ linux-2.6.23/kernel/ipipe/tracer.c @@ -0,0 +1,1342 @@ +/* -*- linux-c -*- + * kernel/ipipe/tracer.c + * + * Copyright (C) 2005 Luotao Fu. + * 2005-2007 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,= + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,= USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ +#define IPIPE_DEFAULT_ACTIVE 0 +#define IPIPE_DEFAULT_MAX 1 +#define IPIPE_DEFAULT_FROZEN 2 + +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) + +#define IPIPE_DEFAULT_PRE_TRACE 10 +#define IPIPE_DEFAULT_POST_TRACE 10 +#define IPIPE_DEFAULT_BACK_TRACE 100 + +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ + +#define IPIPE_TFLG_NMI_LOCK 0x0001 +#define IPIPE_TFLG_NMI_HIT 0x0002 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 + +#define IPIPE_TFLG_HWIRQ_OFF 0x0100 +#define IPIPE_TFLG_FREEZING 0x0200 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain = */ +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled?= */ +#define IPIPE_TFLG_DOMSTATE_BITS 3 + +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) + + +struct ipipe_trace_point{ + short type; + short flags; + unsigned long eip; + unsigned long parent_eip; + unsigned long v; + unsigned long long timestamp; +}; + +struct ipipe_trace_path{ + volatile int flags; + int dump_lock; /* separated from flags due to cross-cpu access */ + int trace_pos; /* next point to fill */ + int begin, end; /* finalised path begin and end */ + int post_trace; /* non-zero when in post-trace phase */ + unsigned long long length; /* max path length in cycles */ + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ + unsigned long nmi_saved_parent_eip; + unsigned long nmi_saved_v; + struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; +} ____cacheline_aligned_in_smp; + +enum ipipe_trace_type +{ + IPIPE_TRACE_FUNC =3D 0, + IPIPE_TRACE_BEGIN, + IPIPE_TRACE_END, + IPIPE_TRACE_FREEZE, + IPIPE_TRACE_SPECIAL, + IPIPE_TRACE_PID, +}; + +#define IPIPE_TYPE_MASK 0x0007 +#define IPIPE_TYPE_BITS 3 + + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + +static struct ipipe_trace_path *trace_paths[NR_CPUS]; + +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ + +static struct ipipe_trace_path trace_paths[NR_CPUS][IPIPE_TRACE_PATHS] =3D= + { [0 ... NR_CPUS-1] =3D + { [0 ... IPIPE_TRACE_PATHS-1] =3D + { .begin =3D -1, .end =3D -1 } + } + }; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +int ipipe_trace_enable =3D 0; + +static int active_path[NR_CPUS] =3D + { [0 ... NR_CPUS-1] =3D IPIPE_DEFAULT_ACTIVE }; +static int max_path[NR_CPUS] =3D + { [0 ... NR_CPUS-1] =3D IPIPE_DEFAULT_MAX }; +static int frozen_path[NR_CPUS] =3D + { [0 ... NR_CPUS-1] =3D IPIPE_DEFAULT_FROZEN }; +static IPIPE_DEFINE_SPINLOCK(global_path_lock); +static int pre_trace =3D IPIPE_DEFAULT_PRE_TRACE; +static int post_trace =3D IPIPE_DEFAULT_POST_TRACE; +static int back_trace =3D IPIPE_DEFAULT_BACK_TRACE; +static int verbose_trace =3D 1; +static unsigned long trace_overhead; + +static unsigned long trigger_begin; +static unsigned long trigger_end; + +static DEFINE_MUTEX(out_mutex); +static struct ipipe_trace_path *print_path; +#ifdef CONFIG_IPIPE_TRACE_PANIC +static struct ipipe_trace_path *panic_path; +#endif /* CONFIG_IPIPE_TRACE_PANIC */ +static int print_pre_trace; +static int print_post_trace; + + +static long __ipipe_signed_tsc2us(long long tsc); +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)= ; + + +static notrace void +__ipipe_store_domain_states(struct ipipe_trace_point *point) +{ + struct ipipe_domain *ipd; + struct list_head *pos; + int i =3D 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + ipd =3D list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, &ipipe_cpudom_var(ipd, status))) + point->flags |=3D 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); + + if (ipd =3D=3D ipipe_current_domain) + point->flags |=3D i << IPIPE_TFLG_CURRDOM_SHIFT; + + if (++i > IPIPE_TFLG_DOMSTATE_BITS) + break; + } +} + +static notrace int __ipipe_get_free_trace_path(int old, int cpu_id) +{ + int new_active =3D old; + struct ipipe_trace_path *tp; + + do { + if (++new_active =3D=3D IPIPE_TRACE_PATHS) + new_active =3D 0; + tp =3D &trace_paths[cpu_id][new_active]; + } while ((new_active =3D=3D max_path[cpu_id]) || + (new_active =3D=3D frozen_path[cpu_id]) || + tp->dump_lock); + + return new_active; +} + +static notrace void +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, + struct ipipe_trace_path *old_tp, int old_pos) +{ + int i; + + new_tp->trace_pos =3D pre_trace+1; + + for (i =3D new_tp->trace_pos; i > 0; i--) + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], + &old_tp->point[WRAP_POINT_NO(old_pos-i)], + sizeof(struct ipipe_trace_point)); + + /* mark the end (i.e. the point before point[0]) invalid */ + new_tp->point[IPIPE_TRACE_POINTS-1].eip =3D 0; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_end(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp =3D tp; + long active =3D active_path[cpu_id]; + unsigned long long length; + + /* do we have a new worst case? */ + length =3D tp->point[tp->end].timestamp - + tp->point[tp->begin].timestamp; + if (length > (trace_paths[cpu_id][max_path[cpu_id]]).length) { + /* we need protection here against other cpus trying + to start a proc dump */ + spin_lock(&global_path_lock); + + /* active path holds new worst case */ + tp->length =3D length; + max_path[cpu_id] =3D active; + + /* find next unused trace path */ + active =3D __ipipe_get_free_trace_path(active, cpu_id); + + spin_unlock(&global_path_lock); + + tp =3D &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + } + + return tp; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_freeze(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp =3D tp; + long active =3D active_path[cpu_id]; + int i; + + /* frozen paths have no core (begin=3Dend) */ + tp->begin =3D tp->end; + + /* we need protection here against other cpus trying + * to set their frozen path or to start a proc dump */ + spin_lock(&global_path_lock); + + frozen_path[cpu_id] =3D active; + + /* find next unused trace path */ + active =3D __ipipe_get_free_trace_path(active, cpu_id); + + /* check if this is the first frozen path */ + for (i =3D 0; i < NR_CPUS; i++) { + if ((i !=3D cpu_id) && + (trace_paths[i][frozen_path[i]].end >=3D 0)) + tp->end =3D -1; + } + + spin_unlock(&global_path_lock); + + tp =3D &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + + return tp; +} + +void notrace +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + struct ipipe_trace_path *tp, *old_tp; + int pos, next_pos, begin; + struct ipipe_trace_point *point; + unsigned long flags; + int cpu_id; + + local_irq_save_hw_notrace(flags); + + cpu_id =3D ipipe_processor_id(); + restart: + tp =3D old_tp =3D &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here starts a race window with NMIs - catched below */ + + /* check for NMI recursion */ + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { + tp->flags |=3D IPIPE_TFLG_NMI_HIT; + + /* first freeze request from NMI context? */ + if ((type =3D=3D IPIPE_TRACE_FREEZE) && + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { + /* save arguments and mark deferred freezing */ + tp->flags |=3D IPIPE_TFLG_NMI_FREEZE_REQ; + tp->nmi_saved_eip =3D eip; + tp->nmi_saved_parent_eip =3D parent_eip; + tp->nmi_saved_v =3D v; + } + return; /* no need for restoring flags inside IRQ */ + } + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags =3D (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (unlikely(tp !=3D &trace_paths[cpu_id][active_path[cpu_id]])) { + /* release lock on wrong path and restart */ + tp->flags &=3D ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * =3D> no need to check for pending freeze requests */ + goto restart; + } + + /* get the point buffer */ + pos =3D tp->trace_pos; + point =3D &tp->point[pos]; + + /* store all trace point data */ + point->type =3D type; + point->flags =3D raw_irqs_disabled_flags(flags) ? IPIPE_TFLG_HWIRQ_OFF = : 0; + point->eip =3D eip; + point->parent_eip =3D parent_eip; + point->v =3D v; + ipipe_read_tsc(point->timestamp); + + __ipipe_store_domain_states(point); + + /* forward to next point buffer */ + next_pos =3D WRAP_POINT_NO(pos+1); + tp->trace_pos =3D next_pos; + + /* only mark beginning if we haven't started yet */ + begin =3D tp->begin; + if (unlikely(type =3D=3D IPIPE_TRACE_BEGIN) && (begin < 0)) + tp->begin =3D pos; + + /* end of critical path, start post-trace if not already started */ + if (unlikely(type =3D=3D IPIPE_TRACE_END) && + (begin >=3D 0) && !tp->post_trace) + tp->post_trace =3D post_trace + 1; + + /* freeze only if the slot is free and we are not already freezing */ + if ((unlikely(type =3D=3D IPIPE_TRACE_FREEZE) || + (unlikely(eip >=3D trigger_begin && eip <=3D trigger_end) && + type =3D=3D IPIPE_TRACE_FUNC)) && + (trace_paths[cpu_id][frozen_path[cpu_id]].begin < 0) && + !(tp->flags & IPIPE_TFLG_FREEZING)) { + tp->post_trace =3D post_trace + 1; + tp->flags |=3D IPIPE_TFLG_FREEZING; + } + + /* enforce end of trace in case of overflow */ + if (unlikely(WRAP_POINT_NO(next_pos + 1) =3D=3D begin)) { + tp->end =3D pos; + goto enforce_end; + } + + /* stop tracing this path if we are in post-trace and + * a) that phase is over now or + * b) a new TRACE_BEGIN came in but we are not freezing this path */ + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace =3D=3D 0) || + ((type =3D=3D IPIPE_TRACE_BEGIN) && + !(tp->flags & IPIPE_TFLG_FREEZING))))) { + /* store the path's end (i.e. excluding post-trace) */ + tp->end =3D WRAP_POINT_NO(pos - post_trace + tp->post_trace); + + enforce_end: + if (tp->flags & IPIPE_TFLG_FREEZING) + tp =3D __ipipe_trace_freeze(cpu_id, tp, pos); + else + tp =3D __ipipe_trace_end(cpu_id, tp, pos); + + /* reset the active path, maybe already start a new one */ + tp->begin =3D (type =3D=3D IPIPE_TRACE_BEGIN) ? + WRAP_POINT_NO(tp->trace_pos - 1) : -1; + tp->end =3D -1; + tp->post_trace =3D 0; + tp->flags =3D 0; + + /* update active_path not earlier to avoid races with NMIs */ + active_path[cpu_id] =3D tp - trace_paths[cpu_id]; + } + + /* we still have old_tp and point, + * let's reset NMI lock and check for catches */ + old_tp->flags &=3D ~IPIPE_TFLG_NMI_LOCK; + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { + /* well, this late tagging may not immediately be visible for + * other cpus already dumping this path - a minor issue */ + point->flags |=3D IPIPE_TFLG_NMI_HIT; + + /* handle deferred freezing from NMI context */ + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, + old_tp->nmi_saved_parent_eip, + old_tp->nmi_saved_v); + } + + local_irq_restore_hw_notrace(flags); +} + +static unsigned long __ipipe_global_path_lock(void) +{ + unsigned long flags; + int cpu_id; + struct ipipe_trace_path *tp; + + spin_lock_irqsave(&global_path_lock, flags); + + cpu_id =3D ipipe_processor_id(); + restart: + tp =3D &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here is small race window with NMIs - catched below */ + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags =3D (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (tp !=3D &trace_paths[cpu_id][active_path[cpu_id]]) { + /* release lock on wrong path and restart */ + tp->flags &=3D ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * =3D> no need to check for pending freeze requests */ + goto restart; + } + + return flags; +} + +static void __ipipe_global_path_unlock(unsigned long flags) +{ + int cpu_id; + struct ipipe_trace_path *tp; + + /* release spinlock first - it's not involved in the NMI issue */ + __ipipe_spin_unlock_irqbegin(&global_path_lock); + + cpu_id =3D ipipe_processor_id(); + tp =3D &trace_paths[cpu_id][active_path[cpu_id]]; + + tp->flags &=3D ~IPIPE_TFLG_NMI_LOCK; + + /* handle deferred freezing from NMI context */ + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, + tp->nmi_saved_parent_eip, tp->nmi_saved_v); + + /* See __ipipe_spin_lock_irqsave() and friends. */ + __ipipe_spin_unlock_irqcomplete(flags); +} + +void notrace ipipe_trace_begin(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_begin); + +void notrace ipipe_trace_end(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_end); + +void notrace ipipe_trace_freeze(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_freeze); + +void notrace ipipe_trace_special(unsigned char id, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_special); + +void notrace ipipe_trace_pid(pid_t pid, short prio) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, pid); +} +EXPORT_SYMBOL(ipipe_trace_pid); + +int ipipe_trace_max_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret =3D 0; + + flags =3D __ipipe_global_path_lock(); + + for (cpu_id =3D 0; cpu_id < NR_CPUS; cpu_id++) { + path =3D &trace_paths[cpu_id][max_path[cpu_id]]; + + if (path->dump_lock) { + ret =3D -EBUSY; + break; + } + + path->begin =3D -1; + path->end =3D -1; + path->trace_pos =3D 0; + path->length =3D 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_max_reset); + +int ipipe_trace_frozen_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret =3D 0; + + flags =3D __ipipe_global_path_lock(); + + for_each_online_cpu(cpu_id) { + path =3D &trace_paths[cpu_id][frozen_path[cpu_id]]; + + if (path->dump_lock) { + ret =3D -EBUSY; + break; + } + + path->begin =3D -1; + path->end =3D -1; + path->trace_pos =3D 0; + path->length =3D 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_frozen_reset); + +static void +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, + int trylock) +{ + struct task_struct *task =3D NULL; + char buf[8]; + int i; + int locked =3D 1; + + if (trylock) { + if (!read_trylock(&tasklist_lock)) + locked =3D 0; + } else + read_lock(&tasklist_lock); + + if (locked) + task =3D find_task_by_pid((pid_t)point->v); + + if (task) + strncpy(task_info, task->comm, 11); + else + strcpy(task_info, "--"); + + if (locked) + read_unlock(&tasklist_lock); + + for (i =3D strlen(task_info); i < 11; i++) + task_info[i] =3D ' '; + + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); + strcpy(task_info + (11 - strlen(buf)), buf); +} + +#ifdef CONFIG_IPIPE_TRACE_PANIC +void ipipe_trace_panic_freeze(void) +{ + unsigned long flags; + int cpu_id; + + if (!ipipe_trace_enable) + return; + + ipipe_trace_enable =3D 0; + local_irq_save_hw_notrace(flags); + + cpu_id =3D ipipe_processor_id(); + + panic_path =3D &trace_paths[cpu_id][active_path[cpu_id]]; + + local_irq_restore_hw(flags); +} +EXPORT_SYMBOL(ipipe_trace_panic_freeze); + +void ipipe_trace_panic_dump(void) +{ + int cnt =3D back_trace; + int start, pos; + char task_info[12]; + + if (!panic_path) + return; + + ipipe_context_check_off(); + + printk("I-pipe tracer log (%d points):\n", cnt); + + start =3D pos =3D WRAP_POINT_NO(panic_path->trace_pos-1); + + while (cnt-- > 0) { + struct ipipe_trace_point *point =3D &panic_path->point[pos]; + long time; + char buf[16]; + int i; + + printk(" %c", + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + for (i =3D IPIPE_TFLG_DOMSTATE_BITS; i >=3D 0; i--) + printk("%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) =3D=3D i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '*' : ' ')); + + if (!point->eip) + printk("--\n"); + else { + __ipipe_trace_point_type(buf, point); + printk(buf); + + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + printk(" "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(task_info, + point, 1); + printk(task_info); + break; + + default: + printk("0x%08lx ", point->v); + } + + time =3D __ipipe_signed_tsc2us(point->timestamp - + panic_path->point[start].timestamp); + printk(" %5ld ", time); + + __ipipe_print_symname(NULL, point->eip); + printk(" ("); + __ipipe_print_symname(NULL, point->parent_eip); + printk(")\n"); + } + pos =3D WRAP_POINT_NO(pos - 1); + } + + panic_path =3D NULL; +} +EXPORT_SYMBOL(ipipe_trace_panic_dump); +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + + +/* --- /proc output --- */ + +static notrace int __ipipe_in_critical_trpath(long point_no) +{ + return ((WRAP_POINT_NO(point_no-print_path->begin) < + WRAP_POINT_NO(print_path->end-print_path->begin)) || + ((print_path->end =3D=3D print_path->begin) && + (WRAP_POINT_NO(point_no-print_path->end) > + print_post_trace))); +} + +static long __ipipe_signed_tsc2us(long long tsc) +{ + unsigned long long abs_tsc; + long us; + + /* ipipe_tsc2us works on unsigned =3D> handle sign separately */ + abs_tsc =3D (tsc >=3D 0) ? tsc : -tsc; + us =3D ipipe_tsc2us(abs_tsc); + if (tsc < 0) + return -us; + else + return us; +} + +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) +{ + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + strcpy(buf, "func "); + break; + + case IPIPE_TRACE_BEGIN: + strcpy(buf, "begin "); + break; + + case IPIPE_TRACE_END: + strcpy(buf, "end "); + break; + + case IPIPE_TRACE_FREEZE: + strcpy(buf, "freeze "); + break; + + case IPIPE_TRACE_SPECIAL: + sprintf(buf, "(0x%02x) ", + point->type >> IPIPE_TYPE_BITS); + break; + + case IPIPE_TRACE_PID: + sprintf(buf, "[%5d] ", (pid_t)point->v); + break; + } +} + +static void +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *poi= nt) +{ + char mark =3D ' '; + int point_no =3D point - print_path->point; + int i; + + if (print_path->end =3D=3D point_no) + mark =3D '<'; + else if (print_path->begin =3D=3D point_no) + mark =3D '>'; + else if (__ipipe_in_critical_trpath(point_no)) + mark =3D ':'; + seq_printf(m, "%c%c", mark, + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + if (!verbose_trace) + return; + + for (i =3D IPIPE_TFLG_DOMSTATE_BITS; i >=3D 0; i--) + seq_printf(m, "%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) =3D=3D i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); +} + +static void +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point)= +{ + unsigned long delay =3D 0; + int next; + char *mark =3D " "; + + next =3D WRAP_POINT_NO(point+1 - print_path->point); + + if (next !=3D print_path->trace_pos) + delay =3D ipipe_tsc2ns(print_path->point[next].timestamp - + point->timestamp); + + if (__ipipe_in_critical_trpath(point - print_path->point)) { + if (delay > IPIPE_DELAY_WARN) + mark =3D "! "; + else if (delay > IPIPE_DELAY_NOTE) + mark =3D "+ "; + } + seq_puts(m, mark); + + if (verbose_trace) + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); + else + seq_puts(m, " "); +} + +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip)= +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name =3D kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + +#ifdef CONFIG_IPIPE_TRACE_PANIC + if (!m) { + /* panic dump */ + if (sym_name) { + printk("%s+0x%lx", sym_name, offset); + if (modname) + printk(" [%s]", modname); + } + } else +#endif /* CONFIG_IPIPE_TRACE_PANIC */ + { + if (sym_name) { + if (verbose_trace) { + seq_printf(m, "%s+0x%lx", sym_name, offset); + if (modname) + seq_printf(m, " [%s]", modname); + } else + seq_puts(m, sym_name); + } else + seq_printf(m, "<%08lx>", eip); + } +} + +static void __ipipe_print_headline(struct seq_file *m) +{ + seq_printf(m, "Calibrated minimum trace-point overhead: %lu.%03lu " + "us\n\n", trace_overhead/1000, trace_overhead%1000); + + if (verbose_trace) { + const char *name[4] =3D { [0 ... 3] =3D "" }; + struct list_head *pos; + int i =3D 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + struct ipipe_domain *ipd =3D + list_entry(pos, struct ipipe_domain, p_link); + + name[i] =3D ipd->name; + if (++i > 3) + break; + } + + seq_printf(m, + " +----- Hard IRQs ('|': locked)\n" + " |+---- %s\n" + " ||+--- %s\n" + " |||+-- %s\n" + " ||||+- %s%s\n" + " ||||| +---------- " + "Delay flag ('+': > %d us, '!': > %d us)\n" + " ||||| | +- " + "NMI noise ('N')\n" + " ||||| | |\n" + " Type User Val. Time Delay Function " + "(Parent)\n", + name[3], name[2], name[1], name[0], + name[0] ? " ('*': domain stalled, '+': current, " + "'#': current+stalled)" : "", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); + } else + seq_printf(m, + " +--------------- Hard IRQs ('|': locked)\n" + " | +- Delay flag " + "('+': > %d us, '!': > %d us)\n" + " | |\n" + " Type Time Function (Parent)\n", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); +} + +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n =3D *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + unsigned long length_usecs; + int points, cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags =3D __ipipe_global_path_lock(); + + /* find the longest of all per-cpu paths */ + print_path =3D NULL; + for_each_online_cpu(cpu) { + path =3D &trace_paths[cpu][max_path[cpu]]; + if ((print_path =3D=3D NULL) || + (path->length > print_path->length)) { + print_path =3D path; + break; + } + } + print_path->dump_lock =3D 1; + + __ipipe_global_path_unlock(flags); + + /* does this path actually contain data? */ + if (print_path->end =3D=3D print_path->begin) + return NULL; + + /* number of points inside the critical path */ + points =3D WRAP_POINT_NO(print_path->end-print_path->begin+1); + + /* pre- and post-tracing length, post-trace length was frozen + in __ipipe_trace, pre-trace may have to be reduced due to + buffer overrun */ + print_pre_trace =3D pre_trace; + print_post_trace =3D WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace =3D IPIPE_TRACE_POINTS - 1 - points - + print_post_trace; + + length_usecs =3D ipipe_tsc2us(print_path->length); + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" + "------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + seq_printf(m, "CPU: %d, Begin: %lld cycles, Trace Points: " + "%d (-%d/+%d), Length: %lu us\n", + cpu, print_path->point[print_path->begin].timestamp, + points, print_pre_trace, print_post_trace, length_usecs); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >=3D WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + n)]; +} + +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *p= os) +{ + loff_t n =3D ++*pos; + + /* check if we are inside the trace range with the next entry */ + if (n >=3D WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + *pos)]; +} + +static void __ipipe_prtrace_stop(struct seq_file *m, void *p) +{ + if (print_path) + print_path->dump_lock =3D 0; + mutex_unlock(&out_mutex); +} + +static int __ipipe_prtrace_show(struct seq_file *m, void *p) +{ + long time; + struct ipipe_trace_point *point =3D p; + char buf[16]; + + if (!point->eip) { + seq_puts(m, "--\n"); + return 0; + } + + __ipipe_print_pathmark(m, point); + __ipipe_trace_point_type(buf, point); + seq_puts(m, buf); + if (verbose_trace) + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + seq_puts(m, " "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(buf, point, 0); + seq_puts(m, buf); + break; + + default: + seq_printf(m, "0x%08lx ", point->v); + } + + time =3D __ipipe_signed_tsc2us(point->timestamp - + print_path->point[print_path->begin].timestamp); + seq_printf(m, "%5ld", time); + + __ipipe_print_delay(m, point); + __ipipe_print_symname(m, point->eip); + seq_puts(m, " ("); + __ipipe_print_symname(m, point->parent_eip); + seq_puts(m, ")\n"); + + return 0; +} + +static struct seq_operations __ipipe_max_ptrace_ops =3D { + .start =3D __ipipe_max_prtrace_start, + .next =3D __ipipe_prtrace_next, + .stop =3D __ipipe_prtrace_stop, + .show =3D __ipipe_prtrace_show +}; + +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *fi= le) +{ + return seq_open(file, &__ipipe_max_ptrace_ops); +} + +static ssize_t +__ipipe_max_reset(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + mutex_lock(&out_mutex); + ipipe_trace_max_reset(); + mutex_unlock(&out_mutex); + + return count; +} + +struct file_operations __ipipe_max_prtrace_fops =3D { + .open =3D __ipipe_max_prtrace_open, + .read =3D seq_read, + .write =3D __ipipe_max_reset, + .llseek =3D seq_lseek, + .release =3D seq_release, +}; + +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *po= s) +{ + loff_t n =3D *pos; + + mutex_lock(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + int cpu; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags =3D __ipipe_global_path_lock(); + + /* find the first of all per-cpu frozen paths */ + print_path =3D NULL; + for_each_online_cpu(cpu) { + path =3D &trace_paths[cpu][frozen_path[cpu]]; + if (path->end >=3D 0) { + print_path =3D path; + break; + } + } + if (print_path) + print_path->dump_lock =3D 1; + + __ipipe_global_path_unlock(flags); + + if (!print_path) + return NULL; + + /* back- and post-tracing length, post-trace length was frozen + in __ipipe_trace, back-trace may have to be reduced due to + buffer overrun */ + print_pre_trace =3D back_trace-1; /* substract freeze point */ + print_post_trace =3D WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace =3D IPIPE_TRACE_POINTS - 2 - + print_post_trace; + + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" + "------------------------------------------------------" + "------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + seq_printf(m, "CPU: %d, Freeze: %lld cycles, Trace Points: %d (+%d)\n"= , + cpu, print_path->point[print_path->begin].timestamp, + print_pre_trace+1, print_post_trace); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >=3D print_pre_trace + 1 + print_post_trace) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin- + print_pre_trace+n)]; +} + +static struct seq_operations __ipipe_frozen_ptrace_ops =3D { + .start =3D __ipipe_frozen_prtrace_start, + .next =3D __ipipe_prtrace_next, + .stop =3D __ipipe_prtrace_stop, + .show =3D __ipipe_prtrace_show +}; + +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file = *file) +{ + return seq_open(file, &__ipipe_frozen_ptrace_ops); +} + +static ssize_t +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n =3D (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, pbuffer, n)) + return -EFAULT; + + buf[n] =3D '\0'; + val =3D simple_strtol(buf, &end, 0); + + if (((*end !=3D '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + ipipe_trace_frozen_reset(); + if (val > 0) + ipipe_trace_freeze(-1); + mutex_unlock(&out_mutex); + + return count; +} + +struct file_operations __ipipe_frozen_prtrace_fops =3D { + .open =3D __ipipe_frozen_prtrace_open, + .read =3D seq_read, + .write =3D __ipipe_frozen_ctrl, + .llseek =3D seq_lseek, + .release =3D seq_release, +}; + +static int __ipipe_rd_proc_val(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len =3D sprintf(page, "%u\n", *(int *)data); + len -=3D off; + if (len <=3D off + count) + *eof =3D 1; + *start =3D page + off; + if (len > count) + len =3D count; + if (len < 0) + len =3D 0; + + return len; +} + +static int __ipipe_wr_proc_val(struct file *file, const char __user *buf= fer, + unsigned long count, void *data) +{ + char *end, buf[16]; + int val; + int n; + + n =3D (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] =3D '\0'; + val =3D simple_strtol(buf, &end, 0); + + if (((*end !=3D '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + mutex_lock(&out_mutex); + *(int *)data =3D val; + mutex_unlock(&out_mutex); + + return count; +} + +static int __ipipe_rd_trigger(char *page, char **start, off_t off, int c= ount, + int *eof, void *data) +{ + int len; + + if (!trigger_begin) + return 0; + + len =3D sprint_symbol(page, trigger_begin); + page[len++] =3D '\n'; + + len -=3D off; + if (len <=3D off + count) + *eof =3D 1; + *start =3D page + off; + if (len > count) + len =3D count; + if (len < 0) + len =3D 0; + + return len; +} + +static int __ipipe_wr_trigger(struct file *file, const char __user *buff= er, + unsigned long count, void *data) +{ + char buf[KSYM_SYMBOL_LEN]; + unsigned long begin, end; + + if (count > sizeof(buf) - 1) + count =3D sizeof(buf) - 1; + if (copy_from_user(buf, buffer, count) < 0) + return -EFAULT; + buf[count] =3D 0; + if (buf[count-1] =3D=3D '\n') + buf[count-1] =3D 0; + + begin =3D kallsyms_lookup_name(buf); + if (!begin || !kallsyms_lookup_size_offset(begin, &end, NULL)) + return -ENOENT; + end +=3D begin - 1; + + mutex_lock(&out_mutex); + /* invalidate the current range before setting a new one */ + trigger_end =3D 0; + wmb(); + /* set new range */ + trigger_begin =3D begin; + wmb(); + trigger_end =3D end; + mutex_unlock(&out_mutex); + + return count; +} + +extern struct proc_dir_entry *ipipe_proc_root; + +static void __init +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, + const char *name, int *value_ptr) +{ + struct proc_dir_entry *entry; + + entry =3D create_proc_entry(name, 0644, trace_dir); + if (entry) { + entry->data =3D value_ptr; + entry->read_proc =3D __ipipe_rd_proc_val; + entry->write_proc =3D __ipipe_wr_proc_val; + entry->owner =3D THIS_MODULE; + } +} + +void __init __ipipe_init_tracer(void) +{ + struct proc_dir_entry *trace_dir; + struct proc_dir_entry *entry; + unsigned long long start, end, min =3D ULLONG_MAX; + int i; +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + int cpu, path; + + for_each_possible_cpu(cpu) { + trace_paths[cpu] =3D vmalloc( + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + if (trace_paths[cpu] =3D=3D NULL) { + printk(KERN_ERR "I-pipe: " + "insufficient memory for trace buffer.\n"); + return; + } + memset(trace_paths[cpu], 0, + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + for (path =3D 0; path < IPIPE_TRACE_PATHS; path++) { + trace_paths[cpu][path].begin =3D -1; + trace_paths[cpu][path].end =3D -1; + } + } +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + ipipe_trace_enable =3D CONFIG_IPIPE_TRACE_ENABLE_VALUE; + + /* Calculate minimum overhead of __ipipe_trace() */ + local_irq_disable_hw(); + for (i =3D 0; i < 100; i++) { + ipipe_read_tsc(start); + __ipipe_trace(IPIPE_TRACE_FUNC, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, 0); + ipipe_read_tsc(end); + + end -=3D start; + if (end < min) + min =3D end; + } + local_irq_enable_hw(); + trace_overhead =3D ipipe_tsc2ns(min); + + trace_dir =3D create_proc_entry("trace", S_IFDIR, ipipe_proc_root); + + entry =3D create_proc_entry("max", 0644, trace_dir); + if (entry) + entry->proc_fops =3D &__ipipe_max_prtrace_fops; + + entry =3D create_proc_entry("frozen", 0644, trace_dir); + if (entry) + entry->proc_fops =3D &__ipipe_frozen_prtrace_fops; + + entry =3D create_proc_entry("trigger", 0644, trace_dir); + if (entry) { + entry->read_proc =3D __ipipe_rd_trigger; + entry->write_proc =3D __ipipe_wr_trigger; + entry->owner =3D THIS_MODULE; + } + + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", + &pre_trace); + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", + &post_trace); + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", + &back_trace); + __ipipe_create_trace_proc_val(trace_dir, "verbose", + &verbose_trace); + __ipipe_create_trace_proc_val(trace_dir, "enable", + &ipipe_trace_enable); +} Index: linux-2.6.23/kernel/irq/chip.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/irq/chip.c +++ linux-2.6.23/kernel/irq/chip.c @@ -340,7 +340,9 @@ handle_level_irq(unsigned int irq, struc irqreturn_t action_ret; =20 spin_lock(&desc->lock); +#ifndef CONFIG_IPIPE mask_ack_irq(desc, irq); +#endif /* CONFIG_IPIPE */ =20 if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -417,8 +419,13 @@ handle_fasteoi_irq(unsigned int irq, str =20 spin_lock(&desc->lock); desc->status &=3D ~IRQ_INPROGRESS; +#ifdef CONFIG_IPIPE + desc->chip->unmask(irq); +out: +#else out: desc->chip->eoi(irq); +#endif =20 spin_unlock(&desc->lock); } @@ -462,8 +469,10 @@ handle_edge_irq(unsigned int irq, struct =20 kstat_cpu(cpu).irqs[irq]++; =20 +#ifndef CONFIG_IPIPE /* Start handling the irq */ desc->chip->ack(irq); +#endif /* CONFIG_IPIPE */ =20 /* Mark the IRQ currently in progress.*/ desc->status |=3D IRQ_INPROGRESS; @@ -503,6 +512,69 @@ out_unlock: spin_unlock(&desc->lock); } =20 +#ifdef CONFIG_IPIPE + +void fastcall __ipipe_ack_simple_irq(unsigned irq, struct irq_desc *desc= ) +{ +} + +void fastcall __ipipe_end_simple_irq(unsigned irq, struct irq_desc *desc= ) +{ +} + +void fastcall __ipipe_ack_level_irq(unsigned irq, struct irq_desc *desc)= +{ + mask_ack_irq(desc, irq); +} + +void fastcall __ipipe_end_level_irq(unsigned irq, struct irq_desc *desc)= +{ + if (desc->chip->unmask) + desc->chip->unmask(irq); +} + +void fastcall __ipipe_ack_fasteoi_irq(unsigned irq, struct irq_desc *des= c) +{ + desc->chip->eoi(irq); +} + +void fastcall __ipipe_end_fasteoi_irq(unsigned irq, struct irq_desc *des= c) +{ + desc->chip->unmask(irq); +} + +void fastcall __ipipe_ack_edge_irq(unsigned irq, struct irq_desc *desc) +{ + desc->chip->ack(irq); +} + +void fastcall __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_ack_bad_irq(unsigned irq, struct irq_desc *desc) +{ + static int done; + + handle_bad_irq(irq, desc); + + if (!done) { + printk(KERN_WARNING "%s: unknown flow handler for IRQ %d\n", + __FUNCTION__, irq); + done =3D 1; + } +} + +void fastcall __ipipe_noack_irq(unsigned irq, struct irq_desc *desc) +{ +} + +void fastcall __ipipe_noend_irq(unsigned irq, struct irq_desc *desc) +{ +} + +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_SMP /** * handle_percpu_IRQ - Per CPU local irq handler @@ -518,8 +590,10 @@ handle_percpu_irq(unsigned int irq, stru =20 kstat_this_cpu.irqs[irq]++; =20 +#ifndef CONFIG_IPIPE if (desc->chip->ack) desc->chip->ack(irq); +#endif /* CONFIG_IPIPE */ =20 action_ret =3D handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -529,6 +603,22 @@ handle_percpu_irq(unsigned int irq, stru desc->chip->eoi(irq); } =20 +#ifdef CONFIG_IPIPE + +void fastcall __ipipe_ack_percpu_irq(unsigned irq, struct irq_desc *desc= ) +{ + if (desc->chip->ack) + desc->chip->ack(irq); +} + +void fastcall __ipipe_end_percpu_irq(unsigned irq, struct irq_desc *desc= ) +{ + if (desc->chip->eoi) + desc->chip->eoi(irq); +} + +#endif /* CONFIG_IPIPE */ + #endif /* CONFIG_SMP */ =20 void @@ -548,6 +638,30 @@ __set_irq_handler(unsigned int irq, irq_ =20 if (!handle) handle =3D handle_bad_irq; +#ifdef CONFIG_IPIPE + else if (handle =3D=3D &handle_simple_irq) { + desc->ipipe_ack =3D &__ipipe_ack_simple_irq; + desc->ipipe_end =3D &__ipipe_end_simple_irq; + } + else if (handle =3D=3D &handle_level_irq) { + desc->ipipe_ack =3D &__ipipe_ack_level_irq; + desc->ipipe_end =3D &__ipipe_end_level_irq; + } + else if (handle =3D=3D &handle_edge_irq) { + desc->ipipe_ack =3D &__ipipe_ack_edge_irq; + desc->ipipe_end =3D &__ipipe_end_edge_irq; + } + else if (handle =3D=3D &handle_fasteoi_irq) { + desc->ipipe_ack =3D &__ipipe_ack_fasteoi_irq; + desc->ipipe_end =3D &__ipipe_end_fasteoi_irq; + } +#ifdef CONFIG_SMP + else if (handle =3D=3D &handle_percpu_irq) { + desc->ipipe_ack =3D &__ipipe_ack_percpu_irq; + desc->ipipe_end =3D &__ipipe_end_percpu_irq; + } +#endif /* CONFIG_SMP */ +#endif /* CONFIG_IPIPE */ else if (desc->chip =3D=3D &no_irq_chip) { printk(KERN_WARNING "Trying to install %sinterrupt handler " "for IRQ%d\n", is_chained ? "chained " : "", irq); @@ -559,7 +673,17 @@ __set_irq_handler(unsigned int irq, irq_ * dummy_irq_chip for easy transition. */ desc->chip =3D &dummy_irq_chip; +#ifdef CONFIG_IPIPE + desc->ipipe_ack =3D &__ipipe_noack_irq; + desc->ipipe_end =3D &__ipipe_noend_irq; +#endif /* CONFIG_IPIPE */ } +#ifdef CONFIG_IPIPE + else { + desc->ipipe_ack =3D &__ipipe_ack_bad_irq; + desc->ipipe_end =3D &__ipipe_noend_irq; + } +#endif /* CONFIG_IPIPE */ =20 spin_lock_irqsave(&desc->lock, flags); =20 @@ -569,6 +693,10 @@ __set_irq_handler(unsigned int irq, irq_ mask_ack_irq(desc, irq); desc->status |=3D IRQ_DISABLED; desc->depth =3D 1; +#ifdef CONFIG_IPIPE + desc->ipipe_ack =3D &__ipipe_ack_bad_irq; + desc->ipipe_end =3D &__ipipe_noend_irq; +#endif /* CONFIG_IPIPE */ } desc->handle_irq =3D handle; desc->name =3D name; Index: linux-2.6.23/kernel/power/swsusp.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/power/swsusp.c +++ linux-2.6.23/kernel/power/swsusp.c @@ -278,6 +278,7 @@ int swsusp_suspend(void) return error; =20 local_irq_disable(); + local_irq_disable_hw_cond(); /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -299,6 +300,7 @@ int swsusp_suspend(void) */ device_power_up(); Enable_irqs: + local_irq_enable_hw_cond(); local_irq_enable(); return error; } @@ -308,6 +310,7 @@ int swsusp_resume(void) int error; =20 local_irq_disable(); + local_irq_disable_hw_cond(); /* NOTE: device_power_down() is just a suspend() with irqs off; * it has no special "power things down" semantics */ @@ -334,6 +337,7 @@ int swsusp_resume(void) restore_processor_state(); touch_softlockup_watchdog(); device_power_up(); + local_irq_enable_hw_cond(); local_irq_enable(); return error; } Index: linux-2.6.23/kernel/printk.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/printk.c +++ linux-2.6.23/kernel/printk.c @@ -504,6 +504,82 @@ static int have_callable_console(void) * printf(3) */ =20 +#ifdef CONFIG_IPIPE + +static ipipe_spinlock_t __ipipe_printk_lock =3D IPIPE_SPIN_LOCK_UNLOCKED= ; + +static int __ipipe_printk_fill; + +static char __ipipe_printk_buf[__LOG_BUF_LEN]; + +void __ipipe_flush_printk (unsigned virq, void *cookie) +{ + char *p =3D __ipipe_printk_buf; + int len, lmax, out =3D 0; + unsigned long flags; + + goto start; + + do { + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); + start: + lmax =3D __ipipe_printk_fill; + while (out < lmax) { + len =3D strlen(p) + 1; + printk("%s",p); + p +=3D len; + out +=3D len; + } + spin_lock_irqsave(&__ipipe_printk_lock, flags); + } + while (__ipipe_printk_fill !=3D lmax); + + __ipipe_printk_fill =3D 0; + + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); +} + +asmlinkage int printk(const char *fmt, ...) +{ + int r, fbytes, oldcount, cs =3D -1; + unsigned long flags; + va_list args; + + va_start(args, fmt); + + if (test_bit(IPIPE_SPRINTK_FLAG,&ipipe_current_domain->flags) || + oops_in_progress) + cs =3D ipipe_disable_context_check(ipipe_processor_id()); + + if (ipipe_current_domain =3D=3D ipipe_root_domain || cs !=3D -1) { + r =3D vprintk(fmt, args); + if (cs !=3D -1) + ipipe_restore_context_check(ipipe_processor_id(), cs); + goto out; + } + + spin_lock_irqsave(&__ipipe_printk_lock, flags); + + oldcount =3D __ipipe_printk_fill; + fbytes =3D __LOG_BUF_LEN - oldcount; + + if (fbytes > 1) { + r =3D vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, + fbytes, fmt, args) + 1; /* account for the null byte */ + __ipipe_printk_fill +=3D r; + } else + r =3D 0; + + spin_unlock_irqrestore(&__ipipe_printk_lock, flags); + + if (oldcount =3D=3D 0) + ipipe_trigger_irq(__ipipe_printk_virq); +out: + va_end(args); + + return r; +} +#else /* !CONFIG_IPIPE */ asmlinkage int printk(const char *fmt, ...) { va_list args; @@ -515,6 +591,7 @@ asmlinkage int printk(const char *fmt, . =20 return r; } +#endif /* CONFIG_IPIPE */ =20 /* cpu currently holding logbuf_lock */ static volatile unsigned int printk_cpu =3D UINT_MAX; Index: linux-2.6.23/kernel/sched.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/sched.c +++ linux-2.6.23/kernel/sched.c @@ -1436,7 +1436,7 @@ static int try_to_wake_up(struct task_st =20 rq =3D task_rq_lock(p, &flags); old_state =3D p->state; - if (!(old_state & state)) + if (!(old_state & state) || (old_state & TASK_NOWAKEUP)) goto out; =20 if (p->se.on_rq) @@ -1850,13 +1850,15 @@ asmlinkage void schedule_tail(struct tas #endif if (current->set_child_tid) put_user(current->pid, current->set_child_tid); + + ipipe_init_notify(current); } =20 /* * context_switch - switch to the new MM and the new * thread's register state. */ -static inline void +static inline int context_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { @@ -1897,12 +1899,17 @@ context_switch(struct rq *rq, struct tas switch_to(prev, next, prev); =20 barrier(); + + if (task_hijacked(prev)) + return 1; /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the 'rq' on its stack * frame will be invalid. */ finish_task_switch(this_rq(), prev); + + return 0; } =20 /* @@ -3474,6 +3481,8 @@ asmlinkage void __sched schedule(void) struct rq *rq; int cpu; =20 + ipipe_check_context(ipipe_root_domain); + need_resched: preempt_disable(); cpu =3D smp_processor_id(); @@ -3481,6 +3490,11 @@ need_resched: rcu_qsctr_inc(cpu); prev =3D rq->curr; switch_count =3D &prev->nivcsw; + if (unlikely(prev->state & TASK_ATOMICSWITCH)) { + prev->state &=3D ~TASK_ATOMICSWITCH; + /* Pop one disable level -- one still remains. */ + preempt_enable(); + } =20 release_kernel_lock(prev); need_resched_nonpreemptible: @@ -3514,7 +3528,8 @@ need_resched_nonpreemptible: rq->curr =3D next; ++*switch_count; =20 - context_switch(rq, prev, next); /* unlocks the rq */ + if (context_switch(rq, prev, next)) /* unlocks the rq unless hijacked = */ + return; } else spin_unlock_irq(&rq->lock); =20 @@ -3542,6 +3557,7 @@ asmlinkage void __sched preempt_schedule struct task_struct *task =3D current; int saved_lock_depth; #endif + ipipe_check_context(ipipe_root_domain); /* * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. @@ -4265,6 +4281,7 @@ recheck: deactivate_task(rq, p, 0); oldprio =3D p->prio; __setscheduler(rq, p, policy, param->sched_priority); + ipipe_setsched_notify(p); if (on_rq) { activate_task(rq, p, 0); /* @@ -6739,3 +6756,54 @@ void set_curr_task(int cpu, struct task_ } =20 #endif + +#ifdef CONFIG_IPIPE + +int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio= ) +{ + unsigned long flags; + int oldprio, on_rq; + struct rq *rq; + + spin_lock_irqsave(&p->pi_lock, flags); + rq =3D __task_rq_lock(p); + on_rq =3D p->se.on_rq; + if (on_rq) + deactivate_task(rq, p, 0); + oldprio =3D p->prio; + __setscheduler(rq, p, policy, prio); + ipipe_setsched_notify(p); + if (on_rq) { + activate_task(rq, p, 0); + if (task_running(rq, p)) { + if (p->prio > oldprio) + resched_task(rq->curr); + } else + check_preempt_curr(rq, p); + } + __task_rq_unlock(rq); + spin_unlock_irqrestore(&p->pi_lock, flags); + + rt_mutex_adjust_pi(p); + + return 0; +} + +EXPORT_SYMBOL(ipipe_setscheduler_root); + +int ipipe_reenter_root (struct task_struct *prev, int policy, int prio) +{ + finish_task_switch(this_rq(), prev); + + (void)reacquire_kernel_lock(current); + preempt_enable_no_resched(); + + if (current->policy !=3D policy || current->rt_priority !=3D prio) + return ipipe_setscheduler_root(current, policy, prio); + + return 0; +} + +EXPORT_SYMBOL(ipipe_reenter_root); + +#endif /* CONFIG_IPIPE */ Index: linux-2.6.23/kernel/signal.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/signal.c +++ linux-2.6.23/kernel/signal.c @@ -455,6 +455,7 @@ void signal_wake_up(struct task_struct * unsigned int mask; =20 set_tsk_thread_flag(t, TIF_SIGPENDING); + ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ =20 /* * For SIGKILL, we want to wake it up in the stopped/traced case. Index: linux-2.6.23/kernel/time/clockevents.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/time/clockevents.c +++ linux-2.6.23/kernel/time/clockevents.c @@ -84,6 +84,7 @@ int clockevents_program_event(struct clo return -ETIME; =20 dev->next_event =3D expires; + dev->delta =3D delta; =20 if (dev->mode =3D=3D CLOCK_EVT_MODE_SHUTDOWN) return 0; Index: linux-2.6.23/lib/Kconfig.debug =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/lib/Kconfig.debug +++ linux-2.6.23/lib/Kconfig.debug @@ -71,6 +71,8 @@ config HEADERS_CHECK exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in your build tree), to make sure they're suitable. =20 +source "kernel/ipipe/Kconfig.debug" + config DEBUG_KERNEL bool "Kernel debugging" help Index: linux-2.6.23/lib/bust_spinlocks.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/lib/bust_spinlocks.c +++ linux-2.6.23/lib/bust_spinlocks.c @@ -12,16 +12,19 @@ #include #include #include +#include =20 =20 void __attribute__((weak)) bust_spinlocks(int yes) { if (yes) { + ipipe_trace_panic_freeze(); oops_in_progress =3D 1; } else { #ifdef CONFIG_VT unblank_screen(); #endif + ipipe_trace_panic_dump(); oops_in_progress =3D 0; wake_up_klogd(); } Index: linux-2.6.23/lib/ioremap.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/lib/ioremap.c +++ linux-2.6.23/lib/ioremap.c @@ -84,8 +84,8 @@ int ioremap_page_range(unsigned long add if (err) break; } while (pgd++, addr =3D next, addr !=3D end); - - flush_cache_vmap(start, end); + __ipipe_pin_range_globally(start, end); + flush_cache_vmap(start, end); =20 return err; } Index: linux-2.6.23/lib/smp_processor_id.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/lib/smp_processor_id.c +++ linux-2.6.23/lib/smp_processor_id.c @@ -13,6 +13,9 @@ unsigned int debug_smp_processor_id(void int this_cpu =3D raw_smp_processor_id(); cpumask_t this_mask; =20 + if (!ipipe_root_domain_p) + goto out; + if (likely(preempt_count)) goto out; =20 Index: linux-2.6.23/lib/spinlock_debug.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/lib/spinlock_debug.c +++ linux-2.6.23/lib/spinlock_debug.c @@ -133,6 +133,8 @@ void _raw_spin_lock(spinlock_t *lock) debug_spin_lock_after(lock); } =20 +EXPORT_SYMBOL(_raw_spin_lock); + int _raw_spin_trylock(spinlock_t *lock) { int ret =3D __raw_spin_trylock(&lock->raw_lock); @@ -148,12 +150,16 @@ int _raw_spin_trylock(spinlock_t *lock) return ret; } =20 +EXPORT_SYMBOL(_raw_spin_trylock); + void _raw_spin_unlock(spinlock_t *lock) { debug_spin_unlock(lock); __raw_spin_unlock(&lock->raw_lock); } =20 +EXPORT_SYMBOL(_raw_spin_unlock); + static void rwlock_bug(rwlock_t *lock, const char *msg) { if (!debug_locks_off()) @@ -199,6 +205,8 @@ void _raw_read_lock(rwlock_t *lock) __raw_read_lock(&lock->raw_lock); } =20 +EXPORT_SYMBOL(_raw_read_lock); + int _raw_read_trylock(rwlock_t *lock) { int ret =3D __raw_read_trylock(&lock->raw_lock); @@ -212,12 +220,16 @@ int _raw_read_trylock(rwlock_t *lock) return ret; } =20 +EXPORT_SYMBOL(_raw_read_trylock); + void _raw_read_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic !=3D RWLOCK_MAGIC, lock, "bad magic"); __raw_read_unlock(&lock->raw_lock); } =20 +EXPORT_SYMBOL(_raw_read_unlock); + static inline void debug_write_lock_before(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic !=3D RWLOCK_MAGIC, lock, "bad magic"); @@ -275,6 +287,8 @@ void _raw_write_lock(rwlock_t *lock) debug_write_lock_after(lock); } =20 +EXPORT_SYMBOL(_raw_write_lock); + int _raw_write_trylock(rwlock_t *lock) { int ret =3D __raw_write_trylock(&lock->raw_lock); @@ -290,8 +304,12 @@ int _raw_write_trylock(rwlock_t *lock) return ret; } =20 +EXPORT_SYMBOL(_raw_write_trylock); + void _raw_write_unlock(rwlock_t *lock) { debug_write_unlock(lock); __raw_write_unlock(&lock->raw_lock); } + +EXPORT_SYMBOL(_raw_write_unlock); Index: linux-2.6.23/mm/memory.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/mm/memory.c +++ linux-2.6.23/mm/memory.c @@ -50,6 +50,7 @@ #include #include #include +#include =20 #include #include @@ -416,6 +417,34 @@ struct page *vm_normal_page(struct vm_ar return pfn_to_page(pfn); } =20 +static inline void cow_user_page(struct page *dst, struct page *src, uns= igned long va, struct vm_area_struct *vma) +{ + /* + * If the source page was a PFN mapping, we don't have + * a "struct page" for it. We do a best-effort copy by + * just copying from the original user address. If that + * fails, we just zero-fill it. Live with it. + */ + if (unlikely(!src)) { + void *kaddr =3D kmap_atomic(dst, KM_USER0); + void __user *uaddr =3D (void __user *)(va & PAGE_MASK); + + /* + * This really shouldn't fail, because the page is there + * in the page tables. But it might just be unreadable, + * in which case we just give up and fill the result with + * zeroes. + */ + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) + memset(kaddr, 0, PAGE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(dst); + return; + =09 + } + copy_user_highpage(dst, src, va, vma); +} + /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range @@ -424,8 +453,8 @@ struct page *vm_normal_page(struct vm_ar =20 static inline void copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, - pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, - unsigned long addr, int *rss) + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, + unsigned long addr, int *rss, struct page *uncow_page) { unsigned long vm_flags =3D vma->vm_flags; pte_t pte =3D *src_pte; @@ -464,6 +493,21 @@ copy_one_pte(struct mm_struct *dst_mm, s * in the parent and the child */ if (is_cow_mapping(vm_flags)) { +#ifdef CONFIG_IPIPE + if (uncow_page) { + struct page *old_page =3D vm_normal_page(vma, addr, pte); + cow_user_page(uncow_page, old_page, addr, vma); + pte =3D mk_pte(uncow_page, vma->vm_page_prot); + + if (vm_flags & VM_SHARED) + pte =3D pte_mkclean(pte); + pte =3D pte_mkold(pte); + + page_dup_rmap(uncow_page, vma, addr); + rss[!!PageAnon(uncow_page)]++; + goto out_set_pte; + } +#endif /* CONFIG_IPIPE */ ptep_set_wrprotect(src_mm, addr, src_pte); pte =3D pte_wrprotect(pte); } @@ -494,13 +538,27 @@ static int copy_pte_range(struct mm_stru pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; int progress =3D 0; + struct page *uncow_page =3D NULL; int rss[2]; - +#ifdef CONFIG_IPIPE + int do_cow_break =3D 0; +again: + if (do_cow_break) { + uncow_page =3D alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (!uncow_page) + return -ENOMEM; + do_cow_break =3D 0; + } +#else again: +#endif rss[1] =3D rss[0] =3D 0; dst_pte =3D pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); - if (!dst_pte) + if (!dst_pte) { + if (uncow_page) + page_cache_release(uncow_page); return -ENOMEM; + } src_pte =3D pte_offset_map_nested(src_pmd, addr); src_ptl =3D pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); @@ -522,7 +580,20 @@ again: progress++; continue; } - copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); +#ifdef CONFIG_IPIPE + if (likely(uncow_page =3D=3D NULL) && likely(pte_present(*src_pte))) {= + if (is_cow_mapping(vma->vm_flags)) { + if (((vma->vm_flags|src_mm->def_flags) & (VM_LOCKED|VM_PINNED)) + =3D=3D (VM_LOCKED|VM_PINNED)) { + do_cow_break =3D 1; + break; + } + } + } +#endif + copy_one_pte(dst_mm, src_mm, dst_pte, + src_pte, vma, addr, rss, uncow_page); + uncow_page =3D NULL; progress +=3D 8; } while (dst_pte++, src_pte++, addr +=3D PAGE_SIZE, addr !=3D end); =20 @@ -1586,34 +1657,6 @@ static inline pte_t maybe_mkwrite(pte_t=20 return pte; } =20 -static inline void cow_user_page(struct page *dst, struct page *src, uns= igned long va, struct vm_area_struct *vma) -{ - /* - * If the source page was a PFN mapping, we don't have - * a "struct page" for it. We do a best-effort copy by - * just copying from the original user address. If that - * fails, we just zero-fill it. Live with it. - */ - if (unlikely(!src)) { - void *kaddr =3D kmap_atomic(dst, KM_USER0); - void __user *uaddr =3D (void __user *)(va & PAGE_MASK); - - /* - * This really shouldn't fail, because the page is there - * in the page tables. But it might just be unreadable, - * in which case we just give up and fill the result with - * zeroes. - */ - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) - memset(kaddr, 0, PAGE_SIZE); - kunmap_atomic(kaddr, KM_USER0); - flush_dcache_page(dst); - return; - - } - copy_user_highpage(dst, src, va, vma); -} - /* * This routine handles present pages, when users try to write * to a shared page. It is done by copying the page to a new address @@ -2867,3 +2910,104 @@ int access_process_vm(struct task_struct return buf - old_buf; } EXPORT_SYMBOL_GPL(access_process_vm); + +#ifdef CONFIG_IPIPE + +static inline int ipipe_pin_pte_range(struct mm_struct *mm, pmd_t *pmd, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + spinlock_t *ptl; + pte_t *pte; + + do { + pte =3D pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!pte) + continue; + + if (!pte_present(*pte)) { + pte_unmap_unlock(pte, ptl); + continue; + } + + if (do_wp_page(mm, vma, addr, pte, pmd, ptl, *pte) =3D=3D VM_FAULT_OOM= ) + return -ENOMEM; + } while (addr +=3D PAGE_SIZE, addr !=3D end); + return 0; +} + +static inline int ipipe_pin_pmd_range(struct mm_struct *mm, pud_t *pud, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pmd_t *pmd; + + pmd =3D pmd_offset(pud, addr); + do { + next =3D pmd_addr_end(addr, end); + if (ipipe_pin_pte_range(mm, pmd, vma, addr, end)) + return -ENOMEM; + } while (pmd++, addr =3D next, addr !=3D end); + return 0; +} + +static inline int ipipe_pin_pud_range(struct mm_struct *mm, pgd_t *pgd, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud =3D pud_offset(pgd, addr); + do { + next =3D pud_addr_end(addr, end); + if (ipipe_pin_pmd_range(mm, pud, vma, addr, end)) + return -ENOMEM; + } while (pud++, addr =3D next, addr !=3D end); + return 0; +} + +int ipipe_disable_ondemand_mappings(struct task_struct *tsk) +{ + unsigned long addr, next, end; + struct vm_area_struct *vma; + struct mm_struct *mm; + int result =3D 0; + pgd_t *pgd; + + mm =3D get_task_mm(tsk); + if (!mm) + return -EPERM; + + down_write(&mm->mmap_sem); + if (mm->def_flags & VM_PINNED) + goto done_mm; + + for (vma =3D mm->mmap; vma; vma =3D vma->vm_next) { + if (!is_cow_mapping(vma->vm_flags)) + continue; + + addr =3D vma->vm_start; + end =3D vma->vm_end; + + pgd =3D pgd_offset(mm, addr); + do { + next =3D pgd_addr_end(addr, end); + if (ipipe_pin_pud_range(mm, pgd, vma, addr, next)) { + result =3D -ENOMEM; + goto done_mm; + } + } while (pgd++, addr =3D next, addr !=3D end); + } + mm->def_flags |=3D VM_PINNED; + + done_mm: + up_write(&mm->mmap_sem); + mmput(mm); + return result; +} + +EXPORT_SYMBOL(ipipe_disable_ondemand_mappings); + +#endif Index: linux-2.6.23/mm/mlock.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/mm/mlock.c +++ linux-2.6.23/mm/mlock.c @@ -173,10 +173,10 @@ asmlinkage long sys_munlock(unsigned lon static int do_mlockall(int flags) { struct vm_area_struct * vma, * prev =3D NULL; - unsigned int def_flags =3D 0; + unsigned int def_flags =3D current->mm->def_flags & VM_PINNED; =20 if (flags & MCL_FUTURE) - def_flags =3D VM_LOCKED; + def_flags |=3D VM_LOCKED; current->mm->def_flags =3D def_flags; if (flags =3D=3D MCL_FUTURE) goto out; Index: linux-2.6.23/mm/vmalloc.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/mm/vmalloc.c +++ linux-2.6.23/mm/vmalloc.c @@ -161,6 +161,7 @@ int map_vm_area(struct vm_struct *area,=20 if (err) break; } while (pgd++, addr =3D next, addr !=3D end); + __ipipe_pin_range_globally((unsigned long) area->addr, end); flush_cache_vmap((unsigned long) area->addr, end); return err; } Index: linux-2.6.23/kernel/spinlock.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/kernel/spinlock.c +++ linux-2.6.23/kernel/spinlock.c @@ -88,7 +88,7 @@ unsigned long __lockfunc _spin_lock_irqs * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); @@ -305,7 +305,7 @@ unsigned long __lockfunc _spin_lock_irqs * _raw_spin_lock_flags() code, because lockdep assumes * that interrupts are not re-enabled during lock-acquire: */ -#ifdef CONFIG_LOCKDEP +#if defined(CONFIG_LOCKDEP) || defined(CONFIG_IPIPE) LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); #else _raw_spin_lock_flags(lock, &flags); Index: linux-2.6.23/arch/i386/boot/compressed/Makefile =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- linux-2.6.23.orig/arch/i386/boot/compressed/Makefile +++ linux-2.6.23/arch/i386/boot/compressed/Makefile @@ -12,6 +12,7 @@ LDFLAGS_vmlinux :=3D -T hostprogs-y :=3D relocs =20 CFLAGS :=3D -m32 -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ + -Iinclude/asm-i386/mach-default \ -fno-strict-aliasing -fPIC \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) --------------050602070300060801070403-- --------------enig05F65E2EA842680795110C65 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iD8DBQFHEIuCniDOoMHTA+kRApaJAJ9njkDapUqJIzGACEa2xellsNUAugCfbdbA XXTiFsa5As2BPJoh4TeYGUA= =4/hQ -----END PGP SIGNATURE----- --------------enig05F65E2EA842680795110C65--