From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <45655211.6070007@domain.hid> Date: Thu, 23 Nov 2006 08:47:29 +0100 From: Jan Kiszka MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------020001080902050609010902" Sender: jan.kiszka@domain.hid Subject: [Xenomai-core] [PATCH] rebased 2.6.16 I-pipe for i386 List-Id: "Xenomai life and development \(bug reports, patches, discussions\)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: adeos-main , xenomai-core This is a multi-part message in MIME format. --------------020001080902050609010902 Content-Type: text/plain; charset=ISO-8859-15 Content-Transfer-Encoding: 7bit Hi, as a by-product of some other hack, here comes the simple rebase of latest i386-1.5-02 I-pipe patch over 2.6.16.33. Might be interesting for those users who plan to follow Adrian Bunk's stable 2.6.16 series. Jan --------------020001080902050609010902 Content-Type: text/plain; name="adeos-ipipe-2.6.16-i386-1.5-02.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="adeos-ipipe-2.6.16-i386-1.5-02.patch" Index: linux-2.6.16.33/Makefile =================================================================== --- linux-2.6.16.33.orig/Makefile +++ linux-2.6.16.33/Makefile @@ -511,6 +511,10 @@ CFLAGS += $(call add-align,CONFIG_CC_AL CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) +ifdef CONFIG_IPIPE_TRACE_MCOUNT +CFLAGS += -pg +endif + ifdef CONFIG_FRAME_POINTER CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else Index: linux-2.6.16.33/arch/i386/Kconfig =================================================================== --- linux-2.6.16.33.orig/arch/i386/Kconfig +++ linux-2.6.16.33/arch/i386/Kconfig @@ -228,6 +228,8 @@ config SCHED_SMT source "kernel/Kconfig.preempt" +source "kernel/ipipe/Kconfig" + config X86_UP_APIC bool "Local APIC support on uniprocessors" depends on !SMP && !(X86_VISWS || X86_VOYAGER) Index: linux-2.6.16.33/arch/i386/boot/compressed/misc.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/boot/compressed/misc.c +++ linux-2.6.16.33/arch/i386/boot/compressed/misc.c @@ -15,6 +15,12 @@ #include #include +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +void __attribute__ ((no_instrument_function)) mcount(void) +{ +} +#endif + /* * gzip declarations */ @@ -112,7 +118,7 @@ static long free_mem_end_ptr; #define INPLACE_MOVE_ROUTINE 0x1000 #define LOW_BUFFER_START 0x2000 #define LOW_BUFFER_MAX 0x90000 -#define HEAP_SIZE 0x3000 +#define HEAP_SIZE 0x4000 static unsigned int low_buffer_end, low_buffer_size; static int high_loaded =0; static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; Index: linux-2.6.16.33/arch/i386/kernel/Makefile =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/Makefile +++ linux-2.6.16.33/arch/i386/kernel/Makefile @@ -31,6 +31,8 @@ obj-$(CONFIG_X86_SUMMIT_NUMA) += summit. obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_MODULES) += module.o obj-y += sysenter.o vsyscall.o +obj-$(CONFIG_IPIPE) += ipipe-core.o ipipe-root.o +obj-$(CONFIG_IPIPE_TRACE_MCOUNT) += ipipe-mcount.o obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o Index: linux-2.6.16.33/arch/i386/kernel/apic.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/apic.c +++ linux-2.6.16.33/arch/i386/kernel/apic.c @@ -78,7 +78,7 @@ void ack_bad_irq(unsigned int irq) * But only ack when the APIC is enabled -AK */ if (cpu_has_apic) - ack_APIC_irq(); + __ack_APIC_irq(); } void __init apic_intr_init(void) @@ -1177,6 +1177,9 @@ inline void smp_local_timer_interrupt(st fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) { int cpu = smp_processor_id(); +#ifdef CONFIG_IPIPE + regs = __ipipe_tick_regs + cpu; +#endif /* CONFIG_IPIPE */ /* * the NMI deadlock-detector uses this. @@ -1250,7 +1253,7 @@ fastcall void smp_spurious_interrupt(str */ v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); + __ack_APIC_irq(); /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", Index: linux-2.6.16.33/arch/i386/kernel/entry.S =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/entry.S +++ linux-2.6.16.33/arch/i386/kernel/entry.S @@ -48,6 +48,7 @@ #include #include #include +#include #include "irq_vectors.h" #define nr_syscalls ((syscall_table_size)/4) @@ -75,11 +76,81 @@ DF_MASK = 0x00000400 NT_MASK = 0x00004000 VM_MASK = 0x00020000 +#ifdef CONFIG_IPIPE +#define CLI call __ipipe_stall_root ; sti +#define STI call __ipipe_unstall_root +#define STI_COND_HW sti +#define EMULATE_ROOT_IRET(bypass) \ + call __ipipe_unstall_iret_root ; \ + bypass: \ + movl EAX(%esp),%eax +#define TEST_PREEMPTIBLE(regs) call __ipipe_kpreempt_root ; testl %eax,%eax +#define restore_nocheck unstall_and_restore_nocheck +#define restore_nmi restore_raw +#define CATCH_ROOT_SYSCALL(bypass1,bypass2) \ + call __ipipe_syscall_root ; \ + testl %eax,%eax ; \ + js bypass1 ; \ + jne bypass2 ; \ + movl ORIG_EAX(%esp),%eax +#define PUSH_XCODE(v) pushl $ ex_/**/v +#define HANDLE_EXCEPTION(code) movl %code,%ecx ; \ + call __ipipe_handle_exception ; \ + testl %eax,%eax ; \ + jnz restore_raw +#define DIVERT_EXCEPTION(code) movl $(__USER_DS), %ecx ; \ + movl %ecx, %ds ; \ + movl %ecx, %es ; \ + movl %esp, %eax ; \ + movl $ex_/**/code,%edx ; \ + call __ipipe_divert_exception ; \ + testl %eax,%eax ; \ + jnz restore_raw + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF +# ifdef CONFIG_REGPARM +# define LOAD_ARG +# define REMOVE_ARG +# else /* !CONFIG_REGPARM */ +# define LOAD_ARG pushl %eax +# define REMOVE_ARG addl $4, %esp +# endif /* CONFIG_REGPARM */ +# define IPIPE_TRACE_IRQ_ENTER \ + lea EIP-4(%esp), %ebp; \ + movl ORIG_EAX(%esp), %eax; \ + LOAD_ARG; \ + call ipipe_trace_begin; \ + REMOVE_ARG +# define IPIPE_TRACE_IRQ_EXIT \ + pushl %eax; \ + movl ORIG_EAX+4(%esp), %eax; \ + LOAD_ARG; \ + call ipipe_trace_end; \ + REMOVE_ARG; \ + popl %eax +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ +# define IPIPE_TRACE_IRQ_ENTER +# define IPIPE_TRACE_IRQ_EXIT +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + +#else /* !CONFIG_IPIPE */ +#define CLI cli +#define STI sti +#define STI_COND_HW +#define EMULATE_ROOT_IRET(bypass) +#define TEST_PREEMPTIBLE(regs) testl $IF_MASK,EFLAGS(regs) +#define restore_nmi restore_all +#define CATCH_ROOT_SYSCALL(bypass1,bypass2) +#define PUSH_XCODE(v) pushl $v +#define HANDLE_EXCEPTION(code) call *%code +#define DIVERT_EXCEPTION(code) +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_PREEMPT -#define preempt_stop cli +#define preempt_stop CLI #else #define preempt_stop -#define resume_kernel restore_nocheck +#define resume_kernel restore_nocheck #endif #define SAVE_ALL \ @@ -124,6 +195,7 @@ VM_MASK = 0x00020000 ENTRY(ret_from_fork) + STI_COND_HW pushl %eax call schedule_tail GET_THREAD_INFO(%ebp) @@ -141,14 +213,14 @@ ENTRY(ret_from_fork) ALIGN ret_from_exception: preempt_stop -ret_from_intr: +ENTRY(ret_from_intr) GET_THREAD_INFO(%ebp) movl EFLAGS(%esp), %eax # mix EFLAGS and CS movb CS(%esp), %al testl $(VM_MASK | 3), %eax jz resume_kernel ENTRY(resume_userspace) - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -166,7 +238,7 @@ need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl jz restore_all - testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ? + TEST_PREEMPTIBLE(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq jmp need_resched @@ -201,6 +273,7 @@ sysenter_past_esp: pushl %eax SAVE_ALL GET_THREAD_INFO(%ebp) + CATCH_ROOT_SYSCALL(sysenter_tail,sysenter_exit) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) @@ -209,11 +282,13 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) - cli +sysenter_tail: + CLI movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work /* if something modifies registers it must also disable sysexit */ + EMULATE_ROOT_IRET(sysenter_exit) movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp @@ -226,6 +301,7 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + CATCH_ROOT_SYSCALL(syscall_exit,restore_raw) # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) @@ -236,7 +312,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) # store the return value syscall_exit: - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -253,7 +329,15 @@ restore_all: andl $(VM_MASK | (4 << 8) | 3), %eax cmpl $((4 << 8) | 3), %eax je ldt_ss # returning to user-space with LDT SS +#ifdef CONFIG_IPIPE +unstall_and_restore_nocheck: + call __ipipe_unstall_iret_root +restore_raw: + # FIXME: we need to check for a return to + # user-space on a 16bit stack even in the NMI case +#else /* !CONFIG_IPIPE */ restore_nocheck: +#endif /* CONFIG_IPIPE */ RESTORE_REGS addl $4, %esp 1: iret @@ -261,7 +345,7 @@ restore_nocheck: iret_exc: sti pushl $0 # no error code - pushl $do_iret_error + PUSH_XCODE(do_iret_error) jmp error_code .previous .section __ex_table,"a" @@ -300,8 +384,9 @@ work_pending: testb $_TIF_NEED_RESCHED, %cl jz work_notifysig work_resched: + STI_COND_HW call schedule - cli # make sure we don't miss an interrupt + CLI # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret movl TI_flags(%ebp), %ecx @@ -353,7 +438,7 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending - sti # could let do_syscall_trace() call + STI # could let do_syscall_trace() call # schedule() instead movl %esp, %eax movl $1, %edx @@ -404,7 +489,7 @@ ENTRY(interrupt) vector=0 ENTRY(irq_entries_start) -.rept NR_IRQS +.rept NR_XIRQS ALIGN 1: pushl $vector-256 jmp common_interrupt @@ -414,6 +499,32 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +#ifdef CONFIG_IPIPE + ALIGN +common_interrupt: + SAVE_ALL + IPIPE_TRACE_IRQ_ENTER; \ + call __ipipe_handle_irq + IPIPE_TRACE_IRQ_EXIT; \ + testl %eax,%eax + jnz ret_from_intr + RESTORE_REGS + addl $4, %esp + iret + +#define BUILD_INTERRUPT(name, nr) \ +ENTRY(name) \ + pushl $nr-288; /* nr - (256 + FIRST_EXTERNAL_VECTOR) */ \ + SAVE_ALL; \ + IPIPE_TRACE_IRQ_ENTER; \ + call __ipipe_handle_irq; \ + IPIPE_TRACE_IRQ_EXIT; \ + testl %eax,%eax; \ + jnz ret_from_intr; \ + RESTORE_REGS; \ + addl $4, %esp; \ + iret +#else /* CONFIG_IPIPE */ ALIGN common_interrupt: SAVE_ALL @@ -428,13 +539,14 @@ ENTRY(name) \ movl %esp,%eax; \ call smp_/**/name; \ jmp ret_from_intr; +#endif /* CONFIG_IPIPE */ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" ENTRY(divide_error) pushl $0 # no error code - pushl $do_divide_error + PUSH_XCODE(do_divide_error) ALIGN error_code: pushl %ds @@ -459,22 +571,23 @@ error_code: movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer - call *%edi + HANDLE_EXCEPTION(edi) jmp ret_from_exception ENTRY(coprocessor_error) pushl $0 - pushl $do_coprocessor_error + PUSH_XCODE(do_coprocessor_error) jmp error_code ENTRY(simd_coprocessor_error) pushl $0 - pushl $do_simd_coprocessor_error + PUSH_XCODE(do_simd_coprocessor_error) jmp error_code ENTRY(device_not_available) pushl $-1 # mark this as an int SAVE_ALL + DIVERT_EXCEPTION(device_not_available) movl %cr0, %eax testl $0x4, %eax # EM (math emulation bit) jne device_not_available_emulate @@ -516,6 +629,7 @@ KPROBE_ENTRY(debug) debug_stack_correct: pushl $-1 # mark this as an int SAVE_ALL + DIVERT_EXCEPTION(do_debug) xorl %edx,%edx # error code 0 movl %esp,%eax # pt_regs pointer call do_debug @@ -554,7 +668,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nmi nmi_stack_fixup: FIX_STACK(12,nmi_stack_correct, 1) @@ -595,6 +709,7 @@ nmi_16bit_stack: KPROBE_ENTRY(int3) pushl $-1 # mark this as an int SAVE_ALL + DIVERT_EXCEPTION(do_int3) xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 @@ -603,60 +718,60 @@ KPROBE_ENTRY(int3) ENTRY(overflow) pushl $0 - pushl $do_overflow + PUSH_XCODE(do_overflow) jmp error_code ENTRY(bounds) pushl $0 - pushl $do_bounds + PUSH_XCODE(do_bounds) jmp error_code ENTRY(invalid_op) pushl $0 - pushl $do_invalid_op + PUSH_XCODE(do_invalid_op) jmp error_code ENTRY(coprocessor_segment_overrun) pushl $0 - pushl $do_coprocessor_segment_overrun + PUSH_XCODE(do_coprocessor_segment_overrun) jmp error_code ENTRY(invalid_TSS) - pushl $do_invalid_TSS + PUSH_XCODE(do_invalid_TSS) jmp error_code ENTRY(segment_not_present) - pushl $do_segment_not_present + PUSH_XCODE(do_segment_not_present) jmp error_code ENTRY(stack_segment) - pushl $do_stack_segment + PUSH_XCODE(do_stack_segment) jmp error_code KPROBE_ENTRY(general_protection) - pushl $do_general_protection + PUSH_XCODE(do_general_protection) jmp error_code .previous .text ENTRY(alignment_check) - pushl $do_alignment_check + PUSH_XCODE(do_alignment_check) jmp error_code KPROBE_ENTRY(page_fault) - pushl $do_page_fault + PUSH_XCODE(do_page_fault) jmp error_code .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) pushl $0 - pushl machine_check_vector + PUSH_XCODE(machine_check_vector) jmp error_code #endif ENTRY(spurious_interrupt_bug) pushl $0 - pushl $do_spurious_interrupt_bug + PUSH_XCODE(do_spurious_interrupt_bug) jmp error_code .section .rodata,"a" Index: linux-2.6.16.33/arch/i386/kernel/i8259.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/i8259.c +++ linux-2.6.16.33/arch/i386/kernel/i8259.c @@ -40,7 +40,7 @@ DEFINE_SPINLOCK(i8259A_lock); static void end_8259A_irq (unsigned int irq) { if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) && - irq_desc[irq].action) + irq_desc[irq].action) enable_8259A_irq(irq); } @@ -89,13 +89,14 @@ void disable_8259A_irq(unsigned int irq) unsigned int mask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); + ipipe_irq_lock(irq); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) @@ -103,13 +104,14 @@ void enable_8259A_irq(unsigned int irq) unsigned int mask = ~(1 << irq); unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + ipipe_irq_unlock(irq); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) @@ -118,12 +120,12 @@ int i8259A_irq_pending(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); return ret; } @@ -170,7 +172,7 @@ static void mask_and_ack_8259A(unsigned unsigned int irqmask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -188,6 +190,15 @@ static void mask_and_ack_8259A(unsigned */ if (cached_irq_mask & irqmask) goto spurious_8259A_irq; +#ifdef CONFIG_IPIPE + if (irq == 0) { + /* Fast timer ack -- don't mask (unless supposedly + spurious) */ + outb(0x20,PIC_MASTER_CMD); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); + return; + } +#endif /* CONFIG_IPIPE */ cached_irq_mask |= irqmask; handle_real_irq: @@ -201,7 +212,7 @@ handle_real_irq: outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq,PIC_MASTER_CMD); /* 'Specific EOI to master */ } - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); return; spurious_8259A_irq: @@ -302,7 +313,7 @@ void init_8259A(int auto_eoi) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ @@ -336,7 +347,7 @@ void init_8259A(int auto_eoi) outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); } /* @@ -410,7 +421,7 @@ void __init init_IRQ(void) */ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) + if (i >= NR_XIRQS) break; if (vector != SYSCALL_VECTOR) set_intr_gate(vector, interrupt[i]); Index: linux-2.6.16.33/arch/i386/kernel/io_apic.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/io_apic.c +++ linux-2.6.16.33/arch/i386/kernel/io_apic.c @@ -182,18 +182,20 @@ static void mask_IO_APIC_irq (unsigned i { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); + ipipe_irq_lock(irq); __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } static void unmask_IO_APIC_irq (unsigned int irq) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); + ipipe_irq_unlock(irq); __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) @@ -202,10 +204,10 @@ static void clear_IO_APIC_pin(unsigned i unsigned long flags; /* Check delivery_mode to be sure we're not clearing an SMI pin */ - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); if (entry.delivery_mode == dest_SMI) return; @@ -214,10 +216,10 @@ static void clear_IO_APIC_pin(unsigned i */ memset(&entry, 0, sizeof(entry)); entry.mask = 1; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } static void clear_IO_APIC (void) @@ -247,7 +249,7 @@ static void set_ioapic_affinity_irq(unsi apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); for (;;) { pin = entry->pin; if (pin == -1) @@ -258,7 +260,7 @@ static void set_ioapic_affinity_irq(unsi entry = irq_2_pin + entry->next; } set_irq_info(irq, cpumask); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } #if defined(CONFIG_IRQBALANCE) @@ -1270,11 +1272,11 @@ static void __init setup_IO_APIC_irqs(vo if (!apic && (irq < 16)) disable_8259A_irq(irq); } - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } } @@ -1318,10 +1320,10 @@ static void __init setup_ExtINT_IRQ0_pin /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); enable_8259A_irq(0); } @@ -1355,14 +1357,14 @@ void __init print_IO_APIC(void) for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); @@ -1431,10 +1433,10 @@ void __init print_IO_APIC(void) for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); printk(KERN_DEBUG " %02x %03X %02X ", i, @@ -1600,7 +1602,7 @@ void /*__init*/ print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1614,7 +1616,7 @@ void /*__init*/ print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); @@ -1643,9 +1645,9 @@ static void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } for(apic = 0; apic < nr_ioapics; apic++) { @@ -1653,10 +1655,10 @@ static void __init enable_IO_APIC(void) /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); /* If the interrupt line is enabled and in ExtInt mode @@ -1730,12 +1732,12 @@ void disable_IO_APIC(void) /* * Add it to the IO-APIC irq-routing table: */ - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, *(((int *)&entry)+1)); io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, *(((int *)&entry)+0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } disconnect_bsp_APIC(ioapic_i8259.pin != -1); } @@ -1775,9 +1777,9 @@ static void __init setup_ioapic_ids_from for (apic = 0; apic < nr_ioapics; apic++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); old_id = mp_ioapics[apic].mpc_apicid; @@ -1836,16 +1838,16 @@ static void __init setup_ioapic_ids_from mp_ioapics[apic].mpc_apicid); reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) printk("could not set ID!\n"); else @@ -1912,14 +1914,15 @@ static unsigned int startup_edge_ioapic_ int was_pending = 0; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); if (irq < 16) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); + ipipe_irq_unlock(irq); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return was_pending; } @@ -1929,6 +1932,25 @@ static unsigned int startup_edge_ioapic_ * interrupt for real. This prevents IRQ storms from unhandled * devices. */ + +#ifdef CONFIG_IPIPE + +static void ack_edge_ioapic_irq (unsigned irq) + +{ + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) { + unsigned long flags; + spin_lock_irqsave_hw(&ioapic_lock,flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore_hw(&ioapic_lock,flags); + } + + __ack_APIC_irq(); +} + +#else /* !CONFIG_IPIPE */ + static void ack_edge_ioapic_irq(unsigned int irq) { move_irq(irq); @@ -1938,6 +1960,8 @@ static void ack_edge_ioapic_irq(unsigned ack_APIC_irq(); } +#endif /* CONFIG_IPIPE */ + /* * Level triggered interrupts can just be masked, * and shutting down and starting up the interrupt @@ -1959,6 +1983,83 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } +#ifdef CONFIG_IPIPE + +/* Prevent low priority IRQs grabbed by high priority domains from + being delayed, waiting for a high priority interrupt handler + running in a low priority domain to complete. */ + +static unsigned long bugous_edge_triggers; + +static void end_level_ioapic_irq (unsigned irq) + +{ + unsigned long flags; + + if (ipipe_root_domain_p) + move_irq(irq); + + spin_lock_irqsave_hw(&ioapic_lock, flags); + + if (test_and_clear_bit(irq,&bugous_edge_triggers)) { + atomic_inc(&irq_mis_count); + __unmask_and_level_IO_APIC_irq(irq); + } + else + __unmask_IO_APIC_irq(irq); + + ipipe_irq_unlock(irq); + + spin_unlock_irqrestore_hw(&ioapic_lock, flags); +} + +static void mask_and_ack_level_ioapic_irq (unsigned irq) + +{ + unsigned long flags, v; + int i; + + i = IO_APIC_VECTOR(irq); + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + spin_lock_irqsave_hw(&ioapic_lock, flags); + + if (!(v & (1 << (i & 0x1f)))) { + set_bit(irq,&bugous_edge_triggers); + __mask_and_edge_IO_APIC_irq(irq); + } + else + __mask_IO_APIC_irq(irq); + + spin_unlock_irqrestore_hw(&ioapic_lock, flags); + + __ack_APIC_irq(); +} + +#ifdef CONFIG_PCI_MSI + +static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) + +{ + int irq = vector_to_irq(vector); + + mask_and_ack_level_ioapic_irq(irq); +} + +#else /* !CONFIG_PCI_MSI */ + +#ifdef CONFIG_IRQBALANCE +static void end_edge_ioapic_irq (unsigned irq) +{ + if (ipipe_root_domain_p) + move_irq(irq); +} +#endif /* CONFIG_IRQBALANCE */ + +#endif /* CONFIG_PCI_MSI */ + +#else /* !CONFIG_IPIPE */ + static void end_level_ioapic_irq (unsigned int irq) { unsigned long v; @@ -1999,6 +2100,8 @@ static void end_level_ioapic_irq (unsign } } +#endif /* CONFIG_IPIPE */ + #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -2011,7 +2114,9 @@ static void ack_edge_ioapic_vector(unsig { int irq = vector_to_irq(vector); +#ifndef CONFIG_IPIPE move_native_irq(vector); +#endif /* !CONFIG_IPIPE */ ack_edge_ioapic_irq(irq); } @@ -2026,7 +2131,8 @@ static void end_level_ioapic_vector (uns { int irq = vector_to_irq(vector); - move_native_irq(vector); + if (ipipe_root_domain_p) + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -2145,7 +2251,7 @@ static void disable_lapic_irq (unsigned static void ack_lapic_irq (unsigned int irq) { - ack_APIC_irq(); + __ack_APIC_irq(); } static void end_lapic_irq (unsigned int i) { /* nothing */ } @@ -2197,10 +2303,10 @@ static inline void unlock_ExtINT_logic(v if (pin == -1) return; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2213,10 +2319,10 @@ static inline void unlock_ExtINT_logic(v entry1.trigger = 0; entry1.vector = 0; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); save_freq_select = CMOS_READ(RTC_FREQ_SELECT); @@ -2235,10 +2341,10 @@ static inline void unlock_ExtINT_logic(v CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); clear_IO_APIC_pin(apic, pin); - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); } /* @@ -2438,12 +2544,12 @@ static int ioapic_suspend(struct sys_dev data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); } - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return 0; } @@ -2459,7 +2565,7 @@ static int ioapic_resume(struct sys_devi data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) { reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; @@ -2469,7 +2575,7 @@ static int ioapic_resume(struct sys_devi io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); } - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return 0; } @@ -2541,9 +2647,9 @@ int __init io_apic_get_unique_id (int io if (physids_empty(apic_id_map)) apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -2577,10 +2683,10 @@ int __init io_apic_get_unique_id (int io if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -2601,9 +2707,9 @@ int __init io_apic_get_version (int ioap union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return reg_01.bits.version; } @@ -2614,9 +2720,9 @@ int __init io_apic_get_redir_entries (in union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -2666,11 +2772,11 @@ int io_apic_set_pci_routing (int ioapic, if (!ioapic && (irq < 16)) disable_8259A_irq(irq); - spin_lock_irqsave(&ioapic_lock, flags); + spin_lock_irqsave_hw(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); - spin_unlock_irqrestore(&ioapic_lock, flags); + spin_unlock_irqrestore_hw(&ioapic_lock, flags); return 0; } Index: linux-2.6.16.33/arch/i386/kernel/ipipe-core.c =================================================================== --- /dev/null +++ linux-2.6.16.33/arch/i386/kernel/ipipe-core.c @@ -0,0 +1,306 @@ +/* -*- linux-c -*- + * linux/arch/i386/kernel/ipipe-core.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-dependent I-PIPE core support for x86. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif /* CONFIG_X86_IO_APIC */ +#include +#include +#endif /* CONFIG_X86_LOCAL_APIC */ + +struct pt_regs __ipipe_tick_regs[IPIPE_NR_CPUS]; + +int __ipipe_tick_irq; + +#ifdef CONFIG_SMP + +static cpumask_t __ipipe_cpu_sync_map; + +static cpumask_t __ipipe_cpu_lock_map; + +static ipipe_spinlock_t __ipipe_cpu_barrier = IPIPE_SPIN_LOCK_UNLOCKED; + +static atomic_t __ipipe_critical_count = ATOMIC_INIT(0); + +static void (*__ipipe_cpu_sync) (void); + +#endif /* CONFIG_SMP */ + +int __ipipe_ack_system_irq(unsigned irq) +{ +#ifdef CONFIG_X86_LOCAL_APIC + __ack_APIC_irq(); +#endif /* CONFIG_X86_LOCAL_APIC */ + return 1; +} + +#ifdef CONFIG_SMP + +/* Always called with hw interrupts off. */ + +void __ipipe_do_critical_sync(unsigned irq, void *cookie) +{ + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + + cpu_set(cpuid, __ipipe_cpu_sync_map); + + /* Now we are in sync with the lock requestor running on another + CPU. Enter a spinning wait until he releases the global + lock. */ + spin_lock_hw(&__ipipe_cpu_barrier); + + /* Got it. Now get out. */ + + if (__ipipe_cpu_sync) + /* Call the sync routine if any. */ + __ipipe_cpu_sync(); + + spin_unlock_hw(&__ipipe_cpu_barrier); + + cpu_clear(cpuid, __ipipe_cpu_sync_map); +} + +#endif /* CONFIG_SMP */ + +/* ipipe_critical_enter() -- Grab the superlock excluding all CPUs + but the current one from a critical section. This lock is used when + we must enforce a global critical section for a single CPU in a + possibly SMP system whichever context the CPUs are running. */ + +unsigned long ipipe_critical_enter(void (*syncfn) (void)) +{ + unsigned long flags; + + local_irq_save_hw(flags); + +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { /* We might be running a SMP-kernel on a UP box... */ + ipipe_declare_cpuid; + cpumask_t lock_map; + + ipipe_load_cpuid(); + + if (!cpu_test_and_set(cpuid, __ipipe_cpu_lock_map)) { + while (cpu_test_and_set + (BITS_PER_LONG - 1, __ipipe_cpu_lock_map)) { + int n = 0; + do { + cpu_relax(); + } while (++n < cpuid); + } + + spin_lock_hw(&__ipipe_cpu_barrier); + + __ipipe_cpu_sync = syncfn; + + /* Send the sync IPI to all processors but the current one. */ + send_IPI_allbutself(IPIPE_CRITICAL_VECTOR); + + cpus_andnot(lock_map, cpu_online_map, + __ipipe_cpu_lock_map); + + while (!cpus_equal(__ipipe_cpu_sync_map, lock_map)) + cpu_relax(); + } + + atomic_inc(&__ipipe_critical_count); + } +#endif /* CONFIG_SMP */ + + return flags; +} + +/* ipipe_critical_exit() -- Release the superlock. */ + +void ipipe_critical_exit(unsigned long flags) +{ +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { /* We might be running a SMP-kernel on a UP box... */ + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + + if (atomic_dec_and_test(&__ipipe_critical_count)) { + spin_unlock_hw(&__ipipe_cpu_barrier); + + while (!cpus_empty(__ipipe_cpu_sync_map)) + cpu_relax(); + + cpu_clear(cpuid, __ipipe_cpu_lock_map); + cpu_clear(BITS_PER_LONG - 1, __ipipe_cpu_lock_map); + } + } +#endif /* CONFIG_SMP */ + + local_irq_restore_hw(flags); +} + +/* ipipe_trigger_irq() -- Push the interrupt at front of the pipeline + just like if it has been actually received from a hw source. Also + works for virtual interrupts. */ + +int fastcall ipipe_trigger_irq(unsigned irq) +{ + struct pt_regs regs; + unsigned long flags; + + if (irq >= IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) && + !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + local_irq_save_hw(flags); + + regs.orig_eax = irq; /* Won't be acked */ + regs.xcs = __KERNEL_CS; + regs.eflags = flags; + + __ipipe_handle_irq(regs); + + local_irq_restore_hw(flags); + + return 1; +} + +#ifdef CONFIG_SMP + +cpumask_t __ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) + +{ + cpumask_t oldmask = irq_affinity[irq]; + + if (irq_desc[irq].handler->set_affinity == NULL) + return CPU_MASK_NONE; + + if (cpus_empty(cpumask)) + return oldmask; /* Return mask value -- no change. */ + + cpus_and(cpumask,cpumask,cpu_online_map); + + if (cpus_empty(cpumask)) + return CPU_MASK_NONE; /* Error -- bad mask value or non-routable IRQ. */ + + irq_affinity[irq] = cpumask; + irq_desc[irq].handler->set_affinity(irq,cpumask); + return oldmask; +} + +int fastcall __ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) + +{ + unsigned long flags; + ipipe_declare_cpuid; + int self; + + if (ipi != IPIPE_SERVICE_IPI0 && + ipi != IPIPE_SERVICE_IPI1 && + ipi != IPIPE_SERVICE_IPI2 && + ipi != IPIPE_SERVICE_IPI3) + return -EINVAL; + + ipipe_lock_cpu(flags); + + self = cpu_isset(cpuid,cpumask); + cpu_clear(cpuid,cpumask); + + if (!cpus_empty(cpumask)) + send_IPI_mask(cpumask,ipi + FIRST_EXTERNAL_VECTOR); + + if (self) + ipipe_trigger_irq(ipi); + + ipipe_unlock_cpu(flags); + + return 0; +} + +#endif /* CONFIG_SMP */ + +int ipipe_get_sysinfo(struct ipipe_sysinfo *info) +{ + info->ncpus = num_online_cpus(); + info->cpufreq = ipipe_cpu_freq(); + info->archdep.tmirq = __ipipe_tick_irq; +#ifdef CONFIG_X86_TSC + info->archdep.tmfreq = ipipe_cpu_freq(); +#else /* !CONFIG_X86_TSC */ + info->archdep.tmfreq = CLOCK_TICK_RATE; +#endif /* CONFIG_X86_TSC */ + + return 0; +} + +int ipipe_tune_timer (unsigned long ns, int flags) + +{ + unsigned hz, latch; + unsigned long x; + + if (flags & IPIPE_RESET_TIMER) + latch = LATCH; + else { + hz = 1000000000 / ns; + + if (hz < HZ) + return -EINVAL; + + latch = (CLOCK_TICK_RATE + hz/2) / hz; + } + + x = ipipe_critical_enter(NULL); /* Sync with all CPUs */ + + /* Shamelessly lifted from init_IRQ() in i8259.c */ + outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ + outb_p(latch & 0xff,0x40); /* LSB */ + outb(latch >> 8,0x40); /* MSB */ + + ipipe_critical_exit(x); + + return 0; +} + +EXPORT_SYMBOL(__ipipe_tick_irq); +EXPORT_SYMBOL(ipipe_critical_enter); +EXPORT_SYMBOL(ipipe_critical_exit); +EXPORT_SYMBOL(ipipe_trigger_irq); +EXPORT_SYMBOL(ipipe_get_sysinfo); +EXPORT_SYMBOL(ipipe_tune_timer); Index: linux-2.6.16.33/arch/i386/kernel/ipipe-mcount.S =================================================================== --- /dev/null +++ linux-2.6.16.33/arch/i386/kernel/ipipe-mcount.S @@ -0,0 +1,45 @@ +/* + * linux/arch/i386/ipipe-mcount.S + * + * Copyright (C) 2005 Jan Kiszka + */ + +#include + +.globl mcount +mcount: + cmpl $0,ipipe_trace_enable + je out + + pushl %ebp + movl %esp,%ebp + + pushl %eax + pushl %ecx + pushl %edx + + pushl $0 # no additional value (v) +#ifdef CONFIG_REGPARM + movl (%ebp),%eax + movl 0x4(%ebp),%edx # __CALLER_ADDR0 + movl 0x4(%eax),%ecx # __CALLER_ADDR1 + movl $0,%eax # IPIPE_TRACE_FN + call __ipipe_trace + popl %eax +#else /* !CONFIG_REGPARM */ + movl (%ebp),%eax + movl 0x4(%eax),%eax + pushl %eax # __CALLER_ADDR1 + movl 0x4(%ebp),%eax + pushl %eax # __CALLER_ADDR0 + pushl $0 # IPIPE_TRACE_FN + call __ipipe_trace + addl $0x10,%esp +#endif /* CONFIG_REGPARM */ + + popl %edx + popl %ecx + popl %eax + popl %ebp +out: + ret Index: linux-2.6.16.33/arch/i386/kernel/ipipe-root.c =================================================================== --- /dev/null +++ linux-2.6.16.33/arch/i386/kernel/ipipe-root.c @@ -0,0 +1,648 @@ +/* -*- linux-c -*- + * linux/arch/i386/kernel/ipipe-root.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-dependent I-PIPE support for x86. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#include +#ifdef CONFIG_X86_IO_APIC +#include +#endif /* CONFIG_X86_IO_APIC */ +#include +#include + +static int __ipipe_noack_irq(unsigned irq) +{ + return 1; +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +fastcall unsigned int do_IRQ(struct pt_regs *regs); +fastcall void smp_apic_timer_interrupt(struct pt_regs *regs); +fastcall void smp_spurious_interrupt(struct pt_regs *regs); +fastcall void smp_error_interrupt(struct pt_regs *regs); +fastcall void smp_thermal_interrupt(struct pt_regs *regs); +fastcall void smp_reschedule_interrupt(struct pt_regs *regs); +fastcall void smp_invalidate_interrupt(struct pt_regs *regs); +fastcall void smp_call_function_interrupt(struct pt_regs *regs); + +static int __ipipe_ack_common_irq(unsigned irq) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_load_cpuid(); /* hw interrupts are off. */ + flags = ipipe_test_and_stall_pipeline(); + preempt_disable(); + desc->handler->ack(irq); + preempt_enable_no_resched(); + ipipe_restore_pipeline_nosync(per_cpu(ipipe_percpu_domain, cpuid), flags, cpuid); + + return 1; +} + +void __ipipe_enable_irqdesc(unsigned irq) +{ + irq_desc[irq].status &= ~IRQ_DISABLED; +} + +#ifdef CONFIG_X86_LOCAL_APIC + +static void __ipipe_null_handler(unsigned irq, void *cookie) +{ + /* Nop. */ +} + +#ifdef CONFIG_SMP + +static notrace int __ipipe_boot_cpuid(void) +{ + return 0; +} + +u8 __ipipe_apicid_2_cpu[IPIPE_NR_CPUS]; + +static notrace int __ipipe_hard_cpuid(void) +{ + unsigned long flags; + int cpu; + + local_irq_save_hw_notrace(flags); + cpu = __ipipe_apicid_2_cpu[GET_APIC_ID(apic_read(APIC_ID))]; + local_irq_restore_hw_notrace(flags); + return cpu; +} + +int (*__ipipe_logical_cpuid)(void) = &__ipipe_boot_cpuid; + +EXPORT_SYMBOL(__ipipe_logical_cpuid); + +#endif /* CONFIG_SMP */ + +#endif /* CONFIG_X86_LOCAL_APIC */ + +/* __ipipe_enable_pipeline() -- We are running on the boot CPU, hw + interrupts are off, and secondary CPUs are still lost in space. */ + +void __init __ipipe_enable_pipeline(void) +{ + unsigned irq; + +#ifdef CONFIG_X86_LOCAL_APIC + + /* Map the APIC system vectors. */ + + ipipe_virtualize_irq(ipipe_root_domain, + LOCAL_TIMER_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_apic_timer_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + SPURIOUS_APIC_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_spurious_interrupt, + NULL, + &__ipipe_noack_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + ERROR_APIC_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_error_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + IPIPE_SERVICE_VECTOR0 - FIRST_EXTERNAL_VECTOR, + &__ipipe_null_handler, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + IPIPE_SERVICE_VECTOR1 - FIRST_EXTERNAL_VECTOR, + &__ipipe_null_handler, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + IPIPE_SERVICE_VECTOR2 - FIRST_EXTERNAL_VECTOR, + &__ipipe_null_handler, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + IPIPE_SERVICE_VECTOR3 - FIRST_EXTERNAL_VECTOR, + &__ipipe_null_handler, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + +#ifdef CONFIG_X86_MCE_P4THERMAL + ipipe_virtualize_irq(ipipe_root_domain, + THERMAL_APIC_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_thermal_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + __ipipe_tick_irq = + using_apic_timer ? LOCAL_TIMER_VECTOR - FIRST_EXTERNAL_VECTOR : 0; + +#else /* !CONFIG_X86_LOCAL_APIC */ + + __ipipe_tick_irq = 0; + +#endif /* CONFIG_X86_LOCAL_APIC */ + +#ifdef CONFIG_SMP + + ipipe_virtualize_irq(ipipe_root_domain, + RESCHEDULE_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_reschedule_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + INVALIDATE_TLB_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_invalidate_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + ipipe_virtualize_irq(ipipe_root_domain, + CALL_FUNCTION_VECTOR - FIRST_EXTERNAL_VECTOR, + (ipipe_irq_handler_t)&smp_call_function_interrupt, + NULL, + &__ipipe_ack_system_irq, + IPIPE_STDROOT_MASK); + + /* Some guest O/S may run tasks over non-Linux stacks, so we + * cannot rely on the regular definition of smp_processor_id() + * on x86 to fetch the logical cpu id. We fix this by using + * our own private physical apicid -> logicial cpuid mapping + * as soon as the pipeline is enabled, so that + * ipipe_processor_id() always do the right thing, regardless + * of the current stack setup. Also note that the pipeline is + * enabled after the APIC space has been mapped in + * trap_init(), so it's safe to use it. */ + + __ipipe_logical_cpuid = &__ipipe_hard_cpuid; + +#endif /* CONFIG_SMP */ + + /* Finally, virtualize the remaining ISA and IO-APIC + * interrupts. Interrupts which have already been virtualized + * will just beget a silent -EPERM error since + * IPIPE_SYSTEM_MASK has been passed for them, that's ok. */ + + for (irq = 0; irq < NR_IRQS; irq++) { + /* Fails for IPIPE_CRITICAL_IPI but that's ok. */ + ipipe_virtualize_irq(ipipe_root_domain, + irq, + (ipipe_irq_handler_t)&do_IRQ, + NULL, + &__ipipe_ack_common_irq, + IPIPE_STDROOT_MASK); + } + +#ifdef CONFIG_X86_LOCAL_APIC + /* Eventually allow these vectors to be reprogrammed. */ + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI0].control &= ~IPIPE_SYSTEM_MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI1].control &= ~IPIPE_SYSTEM_MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI2].control &= ~IPIPE_SYSTEM_MASK; + ipipe_root_domain->irqs[IPIPE_SERVICE_IPI3].control &= ~IPIPE_SYSTEM_MASK; +#endif /* CONFIG_X86_LOCAL_APIC */ +} + +static inline void __fixup_if(struct pt_regs *regs) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); + + if (per_cpu(ipipe_percpu_domain, cpuid) == ipipe_root_domain) { + /* Have the saved hw state look like the domain stall bit, so + that __ipipe_unstall_iret_root() restores the proper + pipeline state for the root stage upon exit. */ + + if (test_bit + (IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)) + regs->eflags &= ~X86_EFLAGS_IF; + else + regs->eflags |= X86_EFLAGS_IF; + } + + ipipe_put_cpu(flags); +} + +/* Check the stall bit of the root domain to make sure the existing + preemption opportunity upon in-kernel resumption could be + exploited. In case a rescheduling could take place, the root stage + is stalled before the hw interrupts are re-enabled. This routine + must be called with hw interrupts off. */ + +asmlinkage int __ipipe_kpreempt_root(struct pt_regs regs) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); + + if (test_bit + (IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status)) { + ipipe_put_cpu(flags); + return 0; /* Root stage is stalled: rescheduling denied. */ + } + + __ipipe_stall_root(); + local_irq_enable_hw_notrace(); + + return 1; /* Ok, may reschedule now. */ +} + +asmlinkage void __ipipe_unstall_iret_root(struct pt_regs regs) +{ + ipipe_declare_cpuid; + + /* Emulate IRET's handling of the interrupt flag. */ + + local_irq_disable_hw(); + + ipipe_load_cpuid(); + + /* Restore the software state as it used to be on kernel + entry. CAUTION: NMIs must *not* return through this + emulation. */ + + if (!(regs.eflags & X86_EFLAGS_IF)) { + __set_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + regs.eflags |= X86_EFLAGS_IF; + } else { + __clear_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + + /* Only sync virtual IRQs here, so that we don't recurse + indefinitely in case of an external interrupt flood. */ + + if ((ipipe_root_domain->cpudata[cpuid]. + irq_pending_hi & IPIPE_IRQMASK_VIRT) != 0) + __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); + } +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + ipipe_trace_end(0x8000000D); +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ +} + +asmlinkage int __ipipe_syscall_root(struct pt_regs regs) +{ + ipipe_declare_cpuid; + unsigned long flags; + + __fixup_if(®s); + + /* This routine either returns: + 0 -- if the syscall is to be passed to Linux; + >0 -- if the syscall should not be passed to Linux, and no + tail work should be performed; + <0 -- if the syscall should not be passed to Linux but the + tail work has to be performed (for handling signals etc). */ + + if (__ipipe_syscall_watched_p(current, regs.orig_eax) && + __ipipe_event_monitored_p(IPIPE_EVENT_SYSCALL) && + __ipipe_dispatch_event(IPIPE_EVENT_SYSCALL,®s) > 0) { + /* We might enter here over a non-root domain and exit + * over the root one as a result of the syscall + * (i.e. by recycling the register set of the current + * context across the migration), so we need to fixup + * the interrupt flag upon return too, so that + * __ipipe_unstall_iret_root() resets the correct + * stall bit on exit. */ + __fixup_if(®s); + + if (ipipe_current_domain == ipipe_root_domain && !in_atomic()) { + /* Sync pending VIRQs before _TIF_NEED_RESCHED + * is tested. */ + ipipe_lock_cpu(flags); + if ((ipipe_root_domain->cpudata[cpuid].irq_pending_hi & IPIPE_IRQMASK_VIRT) != 0) + __ipipe_sync_pipeline(IPIPE_IRQMASK_VIRT); + ipipe_unlock_cpu(flags); + return -1; + } + return 1; + } + + return 0; +} + +static fastcall void do_machine_check_vector(struct pt_regs *regs, long error_code) +{ +#ifdef CONFIG_X86_MCE + extern fastcall void (*machine_check_vector)(struct pt_regs *, long); + machine_check_vector(regs,error_code); +#endif /* CONFIG_X86_MCE */ +} + +fastcall void do_divide_error(struct pt_regs *regs, long error_code); +fastcall void do_overflow(struct pt_regs *regs, long error_code); +fastcall void do_bounds(struct pt_regs *regs, long error_code); +fastcall void do_invalid_op(struct pt_regs *regs, long error_code); +fastcall void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); +fastcall void do_invalid_TSS(struct pt_regs *regs, long error_code); +fastcall void do_segment_not_present(struct pt_regs *regs, long error_code); +fastcall void do_stack_segment(struct pt_regs *regs, long error_code); +fastcall void do_general_protection(struct pt_regs *regs, long error_code); +fastcall void do_page_fault(struct pt_regs *regs, long error_code); +fastcall void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code); +fastcall void do_coprocessor_error(struct pt_regs *regs, long error_code); +fastcall void do_alignment_check(struct pt_regs *regs, long error_code); +fastcall void do_simd_coprocessor_error(struct pt_regs *regs, long error_code); +fastcall void do_iret_error(struct pt_regs *regs, long error_code); + +/* Work around genksyms's issue with over-qualification in decls. */ + +typedef fastcall void __ipipe_exhandler(struct pt_regs *, long); + +typedef __ipipe_exhandler *__ipipe_exptr; + +static __ipipe_exptr __ipipe_std_extable[] = { + + [ex_do_divide_error] = &do_divide_error, + [ex_do_overflow] = &do_overflow, + [ex_do_bounds] = &do_bounds, + [ex_do_invalid_op] = &do_invalid_op, + [ex_do_coprocessor_segment_overrun] = &do_coprocessor_segment_overrun, + [ex_do_invalid_TSS] = &do_invalid_TSS, + [ex_do_segment_not_present] = &do_segment_not_present, + [ex_do_stack_segment] = &do_stack_segment, + [ex_do_general_protection] = do_general_protection, + [ex_do_page_fault] = &do_page_fault, + [ex_do_spurious_interrupt_bug] = &do_spurious_interrupt_bug, + [ex_do_coprocessor_error] = &do_coprocessor_error, + [ex_do_alignment_check] = &do_alignment_check, + [ex_machine_check_vector] = &do_machine_check_vector, + [ex_do_simd_coprocessor_error] = &do_simd_coprocessor_error, + [ex_do_iret_error] = &do_iret_error, +}; + +#ifdef CONFIG_KGDB +#include + +static int __ipipe_xlate_signo[] = { + + [ex_do_divide_error] = SIGFPE, + [ex_do_debug] = SIGTRAP, + [2] = -1, + [ex_do_int3] = SIGTRAP, + [ex_do_overflow] = SIGSEGV, + [ex_do_bounds] = SIGSEGV, + [ex_do_invalid_op] = SIGILL, + [ex_device_not_available] = -1, + [8] = -1, + [ex_do_coprocessor_segment_overrun] = SIGFPE, + [ex_do_invalid_TSS] = SIGSEGV, + [ex_do_segment_not_present] = SIGBUS, + [ex_do_stack_segment] = SIGBUS, + [ex_do_general_protection] = SIGSEGV, + [ex_do_page_fault] = SIGSEGV, + [ex_do_spurious_interrupt_bug] = -1, + [ex_do_coprocessor_error] = -1, + [ex_do_alignment_check] = SIGBUS, + [ex_machine_check_vector] = -1, + [ex_do_simd_coprocessor_error] = -1, + [20 ... 31] = -1, + [ex_do_iret_error] = SIGSEGV, +}; +#endif /* CONFIG_KGDB */ + +fastcall int __ipipe_handle_exception(struct pt_regs *regs, long error_code, int vector) +{ +#ifdef CONFIG_KGDB + /* catch exception KGDB is interested in over non-root domains */ + if (ipipe_current_domain != ipipe_root_domain && + __ipipe_xlate_signo[vector] >= 0 && + !kgdb_handle_exception(vector, __ipipe_xlate_signo[vector], error_code, regs)) + return 1; +#endif /* CONFIG_KGDB */ + + if (!ipipe_trap_notify(vector, regs)) { + __ipipe_exptr handler = __ipipe_std_extable[vector]; + handler(regs,error_code); + __fixup_if(regs); + return 0; + } + + return 1; +} + +fastcall int __ipipe_divert_exception(struct pt_regs *regs, int vector) +{ +#ifdef CONFIG_KGDB + /* catch int1 and int3 over non-root domains */ + if ((ipipe_current_domain != ipipe_root_domain) && + (vector != ex_device_not_available)) { + unsigned int condition = 0; + + if (vector == 1) + get_debugreg(condition, 6); + if (!kgdb_handle_exception(vector, SIGTRAP, condition, regs)) + return 1; + } +#endif /* CONFIG_KGDB */ + + if (ipipe_trap_notify(vector, regs)) + return 1; + + __fixup_if(regs); + + return 0; +} + +/* __ipipe_handle_irq() -- IPIPE's generic IRQ handler. An optimistic + interrupt protection log is maintained here for each domain. Hw + interrupts are off on entry. */ + +int __ipipe_handle_irq(struct pt_regs regs) +{ + struct ipipe_domain *this_domain, *next_domain; + unsigned irq = regs.orig_eax; + struct list_head *head, *pos; + ipipe_declare_cpuid; + int m_ack, s_ack; + + ipipe_load_cpuid(); + + if (regs.orig_eax < 0) { + irq &= 0xff; + m_ack = 0; + } else + m_ack = 1; + + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + if (test_bit(IPIPE_STICKY_FLAG, &this_domain->irqs[irq].control)) + head = &this_domain->p_link; + else { + head = __ipipe_pipeline.next; + next_domain = list_entry(head, struct ipipe_domain, p_link); + if (likely(test_bit(IPIPE_WIRED_FLAG, &next_domain->irqs[irq].control))) { + if (!m_ack && next_domain->irqs[irq].acknowledge != NULL) + next_domain->irqs[irq].acknowledge(irq); + if (likely(__ipipe_dispatch_wired(next_domain, irq))) + goto finalize; + else + goto finalize_nosync; + } + } + + /* Ack the interrupt. */ + + s_ack = m_ack; + pos = head; + + while (pos != &__ipipe_pipeline) { + next_domain = list_entry(pos, struct ipipe_domain, p_link); + + /* For each domain handling the incoming IRQ, mark it as + pending in its log. */ + + if (test_bit + (IPIPE_HANDLE_FLAG, &next_domain->irqs[irq].control)) { + /* Domains that handle this IRQ are polled for + acknowledging it by decreasing priority order. The + interrupt must be made pending _first_ in the domain's + status flags before the PIC is unlocked. */ + + next_domain->cpudata[cpuid].irq_counters[irq].total_hits++; + next_domain->cpudata[cpuid].irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(next_domain, cpuid, irq); + + /* Always get the first master acknowledge available. Once + we've got it, allow slave acknowledge handlers to run + (until one of them stops us). */ + + if (!m_ack) + m_ack = next_domain->irqs[irq].acknowledge(irq); + else if (test_bit + (IPIPE_SHARED_FLAG, + &next_domain->irqs[irq].control) && !s_ack) + s_ack = next_domain->irqs[irq].acknowledge(irq); + } + + /* If the domain does not want the IRQ to be passed down the + interrupt pipe, exit the loop now. */ + + if (!test_bit(IPIPE_PASS_FLAG, &next_domain->irqs[irq].control)) + break; + + pos = next_domain->p_link.next; + } + + if (irq == __ipipe_tick_irq && + __ipipe_pipeline_head_p(ipipe_root_domain) && + ipipe_root_domain->cpudata[cpuid].irq_counters[irq].pending_hits > 1) + /* + * Emulate a loss of clock ticks if Linux is owning + * the time source. The drift will be compensated by + * the timer support code. + */ + ipipe_root_domain->cpudata[cpuid].irq_counters[irq].pending_hits = 1; + +finalize: + + if (irq == __ipipe_tick_irq) { + __ipipe_tick_regs[cpuid].eflags = regs.eflags; + __ipipe_tick_regs[cpuid].eip = regs.eip; + __ipipe_tick_regs[cpuid].xcs = regs.xcs; +#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) + /* Linux profiling code needs this. */ + __ipipe_tick_regs[cpuid].ebp = regs.ebp; +#endif /* CONFIG_SMP && CONFIG_FRAME_POINTER */ + } + + /* Now walk the pipeline, yielding control to the highest + priority domain that has pending interrupt(s) or + immediately to the current domain if the interrupt has been + marked as 'sticky'. This search does not go beyond the + current domain in the pipeline. */ + + __ipipe_walk_pipeline(head, cpuid); + +finalize_nosync: + + ipipe_load_cpuid(); + + if (per_cpu(ipipe_percpu_domain, cpuid) != ipipe_root_domain || + test_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)) + return 0; + +#ifdef CONFIG_SMP + /* Prevent a spurious rescheduling from being triggered on + preemptible kernels along the way out through + ret_from_intr. */ + if (regs.orig_eax < 0) + __set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); +#endif /* CONFIG_SMP */ + + return 1; +} + +EXPORT_SYMBOL_GPL(irq_desc); +EXPORT_SYMBOL_GPL(default_ldt); +EXPORT_SYMBOL_GPL(__switch_to); +EXPORT_SYMBOL_GPL(show_stack); +EXPORT_PER_CPU_SYMBOL_GPL(init_tss); +#ifdef CONFIG_SMP +EXPORT_PER_CPU_SYMBOL_GPL(cpu_tlbstate); +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_IPIPE_TRACE_MCOUNT +void notrace mcount(void); +EXPORT_SYMBOL(mcount); +#endif /* CONFIG_IPIPE_TRACE_MCOUNT */ Index: linux-2.6.16.33/arch/i386/kernel/nmi.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/nmi.c +++ linux-2.6.16.33/arch/i386/kernel/nmi.c @@ -38,6 +38,7 @@ static unsigned int nmi_hz = HZ; static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); +static void default_nmi_watchdog_tick (struct pt_regs * regs); /* * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: @@ -184,6 +185,7 @@ static int __init setup_nmi_watchdog(cha if (nmi >= NMI_INVALID) return 0; + nmi_watchdog_tick = default_nmi_watchdog_tick; if (nmi == NMI_NONE) nmi_watchdog = nmi; /* @@ -519,9 +521,7 @@ void touch_nmi_watchdog (void) touch_softlockup_watchdog(); } -extern void die_nmi(struct pt_regs *, const char *msg); - -void nmi_watchdog_tick (struct pt_regs * regs) +static void default_nmi_watchdog_tick (struct pt_regs * regs) { /* @@ -529,7 +529,12 @@ void nmi_watchdog_tick (struct pt_regs * * always switch the stack NMI-atomically, it's safe to use * smp_processor_id(). */ - int sum, cpu = smp_processor_id(); + int sum; +#ifdef CONFIG_IPIPE + int cpu = ipipe_processor_id(); +#else /* !CONFIG_IPIPE */ + int cpu = smp_processor_id(); +#endif /* !CONFIG_IPIPE */ sum = per_cpu(irq_stat, cpu).apic_timer_irqs; @@ -619,3 +624,4 @@ EXPORT_SYMBOL(reserve_lapic_nmi); EXPORT_SYMBOL(release_lapic_nmi); EXPORT_SYMBOL(disable_timer_nmi_watchdog); EXPORT_SYMBOL(enable_timer_nmi_watchdog); +EXPORT_SYMBOL(touch_nmi_watchdog); Index: linux-2.6.16.33/arch/i386/kernel/process.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/process.c +++ linux-2.6.16.33/arch/i386/kernel/process.c @@ -195,6 +195,7 @@ void cpu_idle(void) play_dead(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; + ipipe_suspend_domain(); idle(); } preempt_enable_no_resched(); Index: linux-2.6.16.33/arch/i386/kernel/smp.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/smp.c +++ linux-2.6.16.33/arch/i386/kernel/smp.c @@ -132,6 +132,9 @@ void __send_IPI_shortcut(unsigned int sh * to the APIC. */ unsigned int cfg; + unsigned long flags; + + local_irq_save_hw_cond(flags); /* * Wait for idle. @@ -147,6 +150,8 @@ void __send_IPI_shortcut(unsigned int sh * Send the IPI. The write to APIC_ICR fires this off. */ apic_write_around(APIC_ICR, cfg); + + local_irq_restore_hw_cond(flags); } void fastcall send_IPI_self(int vector) @@ -163,7 +168,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu unsigned long cfg; unsigned long flags; - local_irq_save(flags); + local_irq_save_hw(flags); WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); /* * Wait for idle. @@ -186,7 +191,7 @@ void send_IPI_mask_bitmask(cpumask_t cpu */ apic_write_around(APIC_ICR, cfg); - local_irq_restore(flags); + local_irq_restore_hw(flags); } void send_IPI_mask_sequence(cpumask_t mask, int vector) @@ -200,7 +205,7 @@ void send_IPI_mask_sequence(cpumask_t ma * should be modified to do 1 message per cluster ID - mbligh */ - local_irq_save(flags); + local_irq_save_hw(flags); for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { if (cpu_isset(query_cpu, mask)) { @@ -227,7 +232,7 @@ void send_IPI_mask_sequence(cpumask_t ma apic_write_around(APIC_ICR, cfg); } } - local_irq_restore(flags); + local_irq_restore_hw(flags); } #include /* must come after the send_IPI functions above for inlining */ @@ -311,7 +316,9 @@ static inline void leave_mm (unsigned lo fastcall void smp_invalidate_interrupt(struct pt_regs *regs) { - unsigned long cpu; + unsigned long cpu, flags; + + local_irq_save_hw_cond(flags); cpu = get_cpu(); @@ -341,6 +348,7 @@ fastcall void smp_invalidate_interrupt(s smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + local_irq_restore_hw_cond(flags); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -401,14 +409,19 @@ void flush_tlb_current_task(void) { struct mm_struct *mm = current->mm; cpumask_t cpu_mask; + unsigned long flags; preempt_disable(); + local_irq_save_hw_cond(flags); + cpu_mask = mm->cpu_vm_mask; cpu_clear(smp_processor_id(), cpu_mask); local_flush_tlb(); if (!cpus_empty(cpu_mask)) flush_tlb_others(cpu_mask, mm, FLUSH_ALL); + + local_irq_restore_hw_cond(flags); preempt_enable(); } @@ -436,8 +449,11 @@ void flush_tlb_page(struct vm_area_struc { struct mm_struct *mm = vma->vm_mm; cpumask_t cpu_mask; + unsigned long flags; preempt_disable(); + local_irq_save_hw_cond(flags); + cpu_mask = mm->cpu_vm_mask; cpu_clear(smp_processor_id(), cpu_mask); @@ -448,6 +464,8 @@ void flush_tlb_page(struct vm_area_struc leave_mm(smp_processor_id()); } + local_irq_restore_hw_cond(flags); + if (!cpus_empty(cpu_mask)) flush_tlb_others(cpu_mask, mm, va); Index: linux-2.6.16.33/arch/i386/kernel/smpboot.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/smpboot.c +++ linux-2.6.16.33/arch/i386/kernel/smpboot.c @@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int api unsigned short nmi_high = 0, nmi_low = 0; ++cpucount; + ipipe_note_apicid(apicid, cpu); /* * We can't use kernel_thread since we must avoid to @@ -1125,6 +1126,7 @@ static void __init smp_boot_cpus(unsigne boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; + ipipe_note_apicid(boot_cpu_physical_apicid,0); current_thread_info()->cpu = 0; smp_tune_scheduling(); Index: linux-2.6.16.33/arch/i386/kernel/time.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/time.c +++ linux-2.6.16.33/arch/i386/kernel/time.c @@ -255,11 +255,12 @@ static inline void do_timer_interrupt(in * This will also deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + unsigned long flags; + spin_lock_irqsave_hw_cond(&i8259A_lock,flags); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore_hw_cond(&i8259A_lock,flags); } #endif Index: linux-2.6.16.33/arch/i386/kernel/timers/timer_pit.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/timers/timer_pit.c +++ linux-2.6.16.33/arch/i386/kernel/timers/timer_pit.c @@ -99,6 +99,10 @@ static unsigned long get_offset_pit(void */ unsigned long jiffies_t; +#ifdef CONFIG_IPIPE + if (!__ipipe_pipeline_head_p(ipipe_root_domain)) + return 0; /* We don't really own the PIT. */ +#endif /* CONFIG_IPIPE */ spin_lock_irqsave(&i8253_lock, flags); /* timer count may underflow right here */ outb_p(0x00, PIT_MODE); /* latch the count ASAP */ Index: linux-2.6.16.33/arch/i386/kernel/timers/timer_tsc.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/timers/timer_tsc.c +++ linux-2.6.16.33/arch/i386/kernel/timers/timer_tsc.c @@ -389,6 +389,20 @@ static void mark_offset_tsc(void) rdtsc(last_tsc_low, last_tsc_high); +#ifdef CONFIG_IPIPE + if (!__ipipe_pipeline_head_p(ipipe_root_domain)) { + /* If Linux does not actually own the timer, clock + ticks will be posted by some higher level domain to + us, and we expect it to do this right and never + lose any of them; so we just need to update the + monotonic base here. */ + this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low; + monotonic_base += cycles_2_ns(this_offset - last_offset); + write_sequnlock(&monotonic_lock); + return; + } +#endif /* CONFIG_IPIPE */ + spin_lock(&i8253_lock); outb_p(0x00, PIT_MODE); /* latch the count ASAP */ Index: linux-2.6.16.33/arch/i386/kernel/traps.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/kernel/traps.c +++ linux-2.6.16.33/arch/i386/kernel/traps.c @@ -95,6 +95,9 @@ static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; static DEFINE_SPINLOCK(die_notifier_lock); +void (*nmi_watchdog_tick) (struct pt_regs * regs); +EXPORT_SYMBOL(nmi_watchdog_tick); + int register_die_notifier(struct notifier_block *nb) { int err = 0; @@ -252,6 +255,11 @@ void show_registers(struct pt_regs *regs regs->esi, regs->edi, regs->ebp, esp); printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", regs->xds & 0xffff, regs->xes & 0xffff, ss); +#ifdef CONFIG_IPIPE + if (ipipe_current_domain != ipipe_root_domain) + printk("I-pipe domain %s",ipipe_current_domain->name); + else +#endif /* CONFIG_IPIPE */ printk(KERN_EMERG "Process %s (pid: %d, threadinfo=%p task=%p)", current->comm, current->pid, current_thread_info(), current); /* @@ -630,13 +638,18 @@ void die_nmi (struct pt_regs *regs, cons do_exit(SIGSEGV); } +EXPORT_SYMBOL(die_nmi); static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = 0; /* Only the BSP gets external NMIs from the system. */ +#ifdef CONFIG_IPIPE + if (!ipipe_processor_id()) +#else /* !CONFIG_IPIPE */ if (!smp_processor_id()) +#endif /* !CONFIG_IPIPE */ reason = get_nmi_reason(); if (!(reason & 0xc0)) { @@ -682,7 +695,11 @@ fastcall void do_nmi(struct pt_regs * re nmi_enter(); +#ifdef CONFIG_IPIPE + cpu = ipipe_processor_id(); +#else /* !CONFIG_IPIPE */ cpu = smp_processor_id(); +#endif /* !CONFIG_IPIPE */ ++nmi_count(cpu); @@ -1009,12 +1026,15 @@ asmlinkage void math_state_restore(struc { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + unsigned long flags; + local_irq_save_hw_cond(flags); clts(); /* Allow maths ops (or we recurse) */ if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + local_irq_restore_hw_cond(flags); } #ifndef CONFIG_MATH_EMULATION Index: linux-2.6.16.33/arch/i386/lib/mmx.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/lib/mmx.c +++ linux-2.6.16.33/arch/i386/lib/mmx.c @@ -32,7 +32,7 @@ void *_mmx_memcpy(void *to, const void * void *p; int i; - if (unlikely(in_interrupt())) + if (unlikely(!ipipe_root_domain_p || in_interrupt())) return __memcpy(to, from, len); p = to; Index: linux-2.6.16.33/arch/i386/mach-visws/visws_apic.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/mach-visws/visws_apic.c +++ linux-2.6.16.33/arch/i386/mach-visws/visws_apic.c @@ -198,7 +198,7 @@ static irqreturn_t piix4_master_intr(int irq_desc_t *desc; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + spin_lock_irqsave_hw(&i8259A_lock, flags); /* Find out what's interrupting in the PIIX4 master 8259 */ outb(0x0c, 0x20); /* OCW3 Poll command */ @@ -235,7 +235,7 @@ static irqreturn_t piix4_master_intr(int outb(0x60 + realirq, 0x20); } - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); desc = irq_desc + realirq; @@ -253,7 +253,7 @@ static irqreturn_t piix4_master_intr(int return IRQ_HANDLED; out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); return IRQ_NONE; } Index: linux-2.6.16.33/arch/i386/mm/fault.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/mm/fault.c +++ linux-2.6.16.33/arch/i386/mm/fault.c @@ -237,6 +237,8 @@ fastcall void __kprobes do_page_fault(st /* get the address */ address = read_cr2(); + local_irq_enable_hw_cond(); + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, SIGSEGV) == NOTIFY_STOP) return; Index: linux-2.6.16.33/arch/i386/mm/ioremap.c =================================================================== --- linux-2.6.16.33.orig/arch/i386/mm/ioremap.c +++ linux-2.6.16.33/arch/i386/mm/ioremap.c @@ -17,6 +17,7 @@ #include #include #include +#include #define ISA_START_ADDRESS 0xa0000 #define ISA_END_ADDRESS 0x100000 @@ -92,6 +93,7 @@ static int ioremap_page_range(unsigned l err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); if (err) break; + set_pgdir(addr, *pgd); } while (pgd++, addr = next, addr != end); flush_tlb_all(); return err; Index: linux-2.6.16.33/drivers/pci/msi.c =================================================================== --- linux-2.6.16.33.orig/drivers/pci/msi.c +++ linux-2.6.16.33/drivers/pci/msi.c @@ -151,6 +151,21 @@ static void unmask_MSI_irq(unsigned int msi_set_mask_bit(vector, 0); } +#ifdef CONFIG_IPIPE +static void ack_MSI_irq_w_maskbit(unsigned int vector) +{ + mask_MSI_irq(vector); + __ack_APIC_irq(); +} +static void ack_MSI_irq_wo_maskbit(unsigned int vector) +{ + __ack_APIC_irq(); +} +#else /* !CONFIG_IPIPE */ +#define ack_MSI_irq_wo_maskbit do_nothing +#define ack_MSI_irq_w_maskbit mask_MSI_irq +#endif /* CONFIG_IPIPE */ + static unsigned int startup_msi_irq_wo_maskbit(unsigned int vector) { struct msi_desc *entry; @@ -214,7 +229,7 @@ static struct hw_interrupt_type msix_irq .shutdown = shutdown_msi_irq, .enable = unmask_MSI_irq, .disable = mask_MSI_irq, - .ack = mask_MSI_irq, + .ack = ack_MSI_irq_w_maskbit, .end = end_msi_irq_w_maskbit, .set_affinity = set_msi_affinity }; @@ -230,7 +245,7 @@ static struct hw_interrupt_type msi_irq_ .shutdown = shutdown_msi_irq, .enable = unmask_MSI_irq, .disable = mask_MSI_irq, - .ack = mask_MSI_irq, + .ack = ack_MSI_irq_w_maskbit, .end = end_msi_irq_w_maskbit, .set_affinity = set_msi_affinity }; @@ -246,7 +261,7 @@ static struct hw_interrupt_type msi_irq_ .shutdown = shutdown_msi_irq, .enable = do_nothing, .disable = do_nothing, - .ack = do_nothing, + .ack = ack_MSI_irq_wo_maskbit, .end = end_msi_irq_wo_maskbit, .set_affinity = set_msi_affinity }; Index: linux-2.6.16.33/include/asm-i386/apic.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/apic.h +++ linux-2.6.16.33/include/asm-i386/apic.h @@ -82,7 +82,13 @@ int get_physical_broadcast(void); # define apic_write_around(x,y) apic_write_atomic((x),(y)) #endif +#ifdef CONFIG_IPIPE +#define ack_APIC_irq() do { } while(0) +static inline void __ack_APIC_irq(void) +#else /* !CONFIG_IPIPE */ +#define __ack_APIC_irq() ack_APIC_irq() static inline void ack_APIC_irq(void) +#endif /* CONFIG_IPIPE */ { /* * ack_APIC_irq() actually gets compiled as a single instruction: @@ -117,7 +123,7 @@ extern int reserve_lapic_nmi(void); extern void release_lapic_nmi(void); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern void nmi_watchdog_tick (struct pt_regs * regs); +extern void (*nmi_watchdog_tick) (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); Index: linux-2.6.16.33/include/asm-i386/io_apic.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/io_apic.h +++ linux-2.6.16.33/include/asm-i386/io_apic.h @@ -16,7 +16,9 @@ #ifdef CONFIG_PCI_MSI static inline int use_pci_vector(void) {return 1;} static inline void disable_edge_ioapic_vector(unsigned int vector) { } +#ifndef CONFIG_IPIPE static inline void mask_and_ack_level_ioapic_vector(unsigned int vector) { } +#endif /* CONFIG_IPIPE */ static inline void end_edge_ioapic_vector (unsigned int vector) { } #define startup_level_ioapic startup_level_ioapic_vector #define shutdown_level_ioapic mask_IO_APIC_vector @@ -35,8 +37,14 @@ static inline void end_edge_ioapic_vecto #else static inline int use_pci_vector(void) {return 0;} static inline void disable_edge_ioapic_irq(unsigned int irq) { } +#ifdef CONFIG_IPIPE +#ifndef CONFIG_IRQBALANCE +static inline void end_edge_ioapic_irq (unsigned int irq) { } +#endif /* !CONFIG_IRQBALANCE */ +#else /* CONFIG_IPIPE */ static inline void mask_and_ack_level_ioapic_irq(unsigned int irq) { } static inline void end_edge_ioapic_irq (unsigned int irq) { } +#endif /* CONFIG_IPIPE */ #define startup_level_ioapic startup_level_ioapic_irq #define shutdown_level_ioapic mask_IO_APIC_irq #define enable_level_ioapic unmask_IO_APIC_irq Index: linux-2.6.16.33/include/asm-i386/ipipe.h =================================================================== --- /dev/null +++ linux-2.6.16.33/include/asm-i386/ipipe.h @@ -0,0 +1,287 @@ +/* -*- linux-c -*- + * include/asm-i386/ipipe.h + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __I386_IPIPE_H +#define __I386_IPIPE_H + +#include + +#ifdef CONFIG_IPIPE + +#include +#include + +#define IPIPE_ARCH_STRING "1.5-02" +#define IPIPE_MAJOR_NUMBER 1 +#define IPIPE_MINOR_NUMBER 5 +#define IPIPE_PATCH_NUMBER 2 + +#ifdef CONFIG_X86_LOCAL_APIC +/* We want to cover the whole IRQ space when the APIC is enabled. */ +#ifdef CONFIG_PCI_MSI +#define IPIPE_NR_XIRQS NR_IRQS +#else /* CONFIG_PCI_MSI */ +#define IPIPE_NR_XIRQS 224 +#endif /* CONFIG_PCI_MSI */ +/* If the APIC is enabled, then we expose four service vectors in the + APIC space which are freely available to domains. */ +#define IPIPE_SERVICE_VECTOR0 0xf5 +#define IPIPE_SERVICE_IPI0 (IPIPE_SERVICE_VECTOR0 - FIRST_EXTERNAL_VECTOR) +#define IPIPE_SERVICE_VECTOR1 0xf6 +#define IPIPE_SERVICE_IPI1 (IPIPE_SERVICE_VECTOR1 - FIRST_EXTERNAL_VECTOR) +#define IPIPE_SERVICE_VECTOR2 0xf7 +#define IPIPE_SERVICE_IPI2 (IPIPE_SERVICE_VECTOR2 - FIRST_EXTERNAL_VECTOR) +#define IPIPE_SERVICE_VECTOR3 0xf8 +#define IPIPE_SERVICE_IPI3 (IPIPE_SERVICE_VECTOR3 - FIRST_EXTERNAL_VECTOR) +#else /* !CONFIG_X86_LOCAL_APIC */ +#define IPIPE_NR_XIRQS NR_IRQS +#endif /* CONFIG_X86_LOCAL_APIC */ + +#define IPIPE_IRQ_ISHIFT 5 /* 2^5 for 32bits arch. */ +#define NR_XIRQS IPIPE_NR_XIRQS + +#define ex_do_divide_error 0 +#define ex_do_debug 1 +/* NMI not pipelined. */ +#define ex_do_int3 3 +#define ex_do_overflow 4 +#define ex_do_bounds 5 +#define ex_do_invalid_op 6 +#define ex_device_not_available 7 +/* Double fault not pipelined. */ +#define ex_do_coprocessor_segment_overrun 9 +#define ex_do_invalid_TSS 10 +#define ex_do_segment_not_present 11 +#define ex_do_stack_segment 12 +#define ex_do_general_protection 13 +#define ex_do_page_fault 14 +#define ex_do_spurious_interrupt_bug 15 +#define ex_do_coprocessor_error 16 +#define ex_do_alignment_check 17 +#define ex_machine_check_vector 18 +#define ex_do_simd_coprocessor_error 19 +#define ex_do_iret_error 32 + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +#ifdef CONFIG_SMP + +#include +#include +#include +#include + +#define IPIPE_CRITICAL_VECTOR 0xf9 /* Used by ipipe_critical_enter/exit() */ +#define IPIPE_CRITICAL_IPI (IPIPE_CRITICAL_VECTOR - FIRST_EXTERNAL_VECTOR) + +extern int (*__ipipe_logical_cpuid)(void); + +#define ipipe_processor_id() __ipipe_logical_cpuid() + +extern u8 __ipipe_apicid_2_cpu[]; + +#define ipipe_note_apicid(apicid,cpu) \ +do { \ + __ipipe_apicid_2_cpu[apicid] = cpu; \ +} while(0) + +#else /* !CONFIG_SMP */ + +#define ipipe_note_apicid(apicid,cpu) do { } while(0) +#define ipipe_processor_id() 0 + +#endif /* CONFIG_SMP */ + +#define prepare_arch_switch(next) \ +do { \ + ipipe_schedule_notify(current, next); \ + local_irq_disable_hw(); \ +} while(0) + +#define task_hijacked(p) \ + ({ int x = ipipe_current_domain != ipipe_root_domain; \ + __clear_bit(IPIPE_SYNC_FLAG,&ipipe_root_domain->cpudata[task_cpu(p)].status); \ + local_irq_enable_hw(); x; }) + +/* IDT fault vectors */ +#define IPIPE_NR_FAULTS 33 /* 32 from IDT + iret_error */ +/* Pseudo-vectors used for kernel events */ +#define IPIPE_FIRST_EVENT IPIPE_NR_FAULTS +#define IPIPE_EVENT_SYSCALL (IPIPE_FIRST_EVENT) +#define IPIPE_EVENT_SCHEDULE (IPIPE_FIRST_EVENT + 1) +#define IPIPE_EVENT_SIGWAKE (IPIPE_FIRST_EVENT + 2) +#define IPIPE_EVENT_SETSCHED (IPIPE_FIRST_EVENT + 3) +#define IPIPE_EVENT_INIT (IPIPE_FIRST_EVENT + 4) +#define IPIPE_EVENT_EXIT (IPIPE_FIRST_EVENT + 5) +#define IPIPE_EVENT_CLEANUP (IPIPE_FIRST_EVENT + 6) +#define IPIPE_LAST_EVENT IPIPE_EVENT_CLEANUP +#define IPIPE_NR_EVENTS (IPIPE_LAST_EVENT + 1) + +struct ipipe_domain; + +struct ipipe_sysinfo { + + int ncpus; /* Number of CPUs on board */ + u64 cpufreq; /* CPU frequency (in Hz) */ + + /* Arch-dependent block */ + + struct { + unsigned tmirq; /* Timer tick IRQ */ + u64 tmfreq; /* Timer frequency */ + } archdep; +}; + +#define ipipe_read_tsc(t) __asm__ __volatile__("rdtsc" : "=A" (t)) +#define ipipe_cpu_freq() ({ unsigned long long __freq = cpu_has_tsc?(1000LL * cpu_khz):CLOCK_TICK_RATE; __freq; }) + +#define ipipe_tsc2ns(t) \ +({ \ + unsigned long long delta = (t)*1000; \ + do_div(delta, cpu_khz/1000+1); \ + (unsigned long)delta; \ +}) + +#define ipipe_tsc2us(t) \ +({ \ + unsigned long long delta = (t); \ + do_div(delta, cpu_khz/1000+1); \ + (unsigned long)delta; \ +}) + +/* Private interface -- Internal use only */ + +#define __ipipe_check_platform() do { } while(0) + +#define __ipipe_init_platform() do { } while(0) + +#define __ipipe_enable_irq(irq) irq_desc[irq].handler->enable(irq) + +#define __ipipe_disable_irq(irq) irq_desc[irq].handler->disable(irq) + +void __ipipe_enable_irqdesc(unsigned irq); + +void __ipipe_enable_pipeline(void); + +int __ipipe_ack_system_irq(unsigned irq); + +int __ipipe_handle_irq(struct pt_regs regs); + +void __ipipe_do_critical_sync(unsigned irq, void *cookie); + +extern struct pt_regs __ipipe_tick_regs[]; + +extern int __ipipe_tick_irq; + +#define __ipipe_call_root_xirq_handler(ipd,irq) \ + __asm__ __volatile__ ("pushfl\n\t" \ + "pushl %%cs\n\t" \ + "pushl $1f\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%es\n\t" \ + "pushl %%ds\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%ebp\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edx\n\t" \ + "pushl %%ecx\n\t" \ + "pushl %%ebx\n\t" \ + "movl %%esp,%%eax\n\t" \ + "call *%1\n\t" \ + "jmp ret_from_intr\n\t" \ + "1:\n" \ + : /* no output */ \ + : "a" (irq-256), "m" ((ipd)->irqs[irq].handler)) + +#define __ipipe_call_root_virq_handler(ipd,irq) \ + __asm__ __volatile__ ("pushfl\n\t" \ + "pushl %%cs\n\t" \ + "pushl $1f\n\t" \ + "pushl $-1\n\t" \ + "pushl %%es\n\t" \ + "pushl %%ds\n\t" \ + "pushl %%eax\n\t" \ + "pushl %%ebp\n\t" \ + "pushl %%edi\n\t" \ + "pushl %%esi\n\t" \ + "pushl %%edx\n\t" \ + "pushl %%ecx\n\t" \ + "pushl %%ebx\n\t" \ + "pushl %2\n\t" \ + "pushl %%eax\n\t" \ + "call *%1\n\t" \ + "addl $8,%%esp\n\t" \ + "jmp ret_from_intr\n\t" \ + "1:\n" \ + : /* no output */ \ + : "a" (irq), "m" ((ipd)->irqs[irq].handler), "r" ((ipd)->irqs[irq].cookie)) + +static inline unsigned long __ipipe_ffnz(unsigned long ul) +{ + __asm__("bsrl %1, %0":"=r"(ul) + : "r"(ul)); + return ul; +} + +/* When running handlers, enable hw interrupts for all domains but the + * one heading the pipeline, so that IRQs can never be significantly + * deferred for the latter. */ +#define __ipipe_run_isr(ipd, irq, cpuid) \ +do { \ + local_irq_enable_nohead(ipd); \ + if (ipd == ipipe_root_domain) { \ + if (likely(!ipipe_virtual_irq_p(irq))) { \ + __ipipe_call_root_xirq_handler(ipd,irq); \ + } else { \ + irq_enter(); \ + __ipipe_call_root_virq_handler(ipd,irq); \ + irq_exit(); \ + } \ + } else { \ + __clear_bit(IPIPE_SYNC_FLAG, &cpudata->status); \ + ipd->irqs[irq].handler(irq, ipd->irqs[irq].cookie); \ + __set_bit(IPIPE_SYNC_FLAG, &cpudata->status); \ + } \ + local_irq_disable_nohead(ipd); \ +} while(0) + +#endif /* __ASSEMBLY__ */ + +#define __ipipe_syscall_watched_p(p, sc) \ + (((p)->flags & PF_EVNOTIFY) || (unsigned long)sc >= NR_syscalls) + +#else /* !CONFIG_IPIPE */ + +#define task_hijacked(p) 0 + +#define NR_XIRQS NR_IRQS + +#define ipipe_note_apicid(apicid,cpu) do { } while(0) + +#endif /* CONFIG_IPIPE */ + +#endif /* !__I386_IPIPE_H */ Index: linux-2.6.16.33/include/asm-i386/mach-default/do_timer.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/mach-default/do_timer.h +++ linux-2.6.16.33/include/asm-i386/mach-default/do_timer.h @@ -50,14 +50,15 @@ static inline void do_timer_interrupt_ho static inline int do_timer_overflow(int count) { int i; + unsigned long flags; - spin_lock(&i8259A_lock); + spin_lock_irqsave_hw(&i8259A_lock, flags); /* * This is tricky when I/O APICs are used; * see do_timer_interrupt(). */ i = inb(0x20); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); /* assumption about timer being IRQ0 */ if (i & 0x01) { Index: linux-2.6.16.33/include/asm-i386/mach-visws/do_timer.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/mach-visws/do_timer.h +++ linux-2.6.16.33/include/asm-i386/mach-visws/do_timer.h @@ -29,14 +29,15 @@ static inline void do_timer_interrupt_ho static inline int do_timer_overflow(int count) { int i; + unsigned long flags; - spin_lock(&i8259A_lock); + spin_lock_irqsave_hw(&i8259A_lock, flags); /* * This is tricky when I/O APICs are used; * see do_timer_interrupt(). */ i = inb(0x20); - spin_unlock(&i8259A_lock); + spin_unlock_irqrestore_hw(&i8259A_lock, flags); /* assumption about timer being IRQ0 */ if (i & 0x01) { Index: linux-2.6.16.33/include/asm-i386/mmu_context.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/mmu_context.h +++ linux-2.6.16.33/include/asm-i386/mmu_context.h @@ -17,7 +17,7 @@ void destroy_context(struct mm_struct *m static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { #ifdef CONFIG_SMP - unsigned cpu = smp_processor_id(); + unsigned cpu = smp_processor_id_hw(); if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; #endif @@ -27,7 +27,7 @@ static inline void switch_mm(struct mm_s struct mm_struct *next, struct task_struct *tsk) { - int cpu = smp_processor_id(); + int cpu = smp_processor_id_hw(); if (likely(prev != next)) { /* stop flush ipis for the previous mm */ @@ -67,6 +67,11 @@ static inline void switch_mm(struct mm_s asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0)) #define activate_mm(prev, next) \ - switch_mm((prev),(next),NULL) +do { \ + unsigned long flags; \ + local_irq_save_hw_cond(flags); \ + switch_mm((prev),(next),NULL); \ + local_irq_restore_hw_cond(flags); \ +} while(0) #endif Index: linux-2.6.16.33/include/asm-i386/nmi.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/nmi.h +++ linux-2.6.16.33/include/asm-i386/nmi.h @@ -25,4 +25,6 @@ void set_nmi_callback(nmi_callback_t cal */ void unset_nmi_callback(void); +void die_nmi(struct pt_regs *, const char *msg); + #endif /* ASM_NMI_H */ Index: linux-2.6.16.33/include/asm-i386/pgalloc.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/pgalloc.h +++ linux-2.6.16.33/include/asm-i386/pgalloc.h @@ -47,4 +47,27 @@ static inline void pte_free(struct page #define check_pgt_cache() do { } while (0) +static inline void set_pgdir(unsigned long address, pgd_t entry) +{ +#ifdef CONFIG_IPIPE + struct task_struct * p; + struct page *page; + pgd_t *pgd; + + read_lock(&tasklist_lock); + + for_each_process(p) { + if(p->mm) + *pgd_offset(p->mm,address) = entry; + } + + read_unlock(&tasklist_lock); + + for (page = pgd_list; page; page = (struct page *)page->index) { + pgd = (pgd_t *)page_address(page); + pgd[address >> PGDIR_SHIFT] = entry; + } +#endif /* CONFIG_IPIPE */ +} + #endif /* _I386_PGALLOC_H */ Index: linux-2.6.16.33/include/asm-i386/spinlock.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/spinlock.h +++ linux-2.6.16.33/include/asm-i386/spinlock.h @@ -32,6 +32,9 @@ "jmp 1b\n" \ "3:\n\t" +#ifdef CONFIG_IPIPE +#define __raw_spin_lock_string_flags __raw_spin_lock_string +#else /* !CONFIG_IPIPE */ #define __raw_spin_lock_string_flags \ "\n1:\t" \ "lock ; decb %0\n\t" \ @@ -47,6 +50,7 @@ "cli\n\t" \ "jmp 1b\n" \ "4:\n\t" +#endif /* CONFIG_IPIPE */ static inline void __raw_spin_lock(raw_spinlock_t *lock) { Index: linux-2.6.16.33/include/asm-i386/system.h =================================================================== --- linux-2.6.16.33.orig/include/asm-i386/system.h +++ linux-2.6.16.33/include/asm-i386/system.h @@ -519,6 +519,109 @@ struct alt_instr { #define set_wmb(var, value) do { var = value; wmb(); } while (0) /* interrupt control.. */ +#ifdef CONFIG_IPIPE + +#include + +void __ipipe_stall_root(void); + +void __ipipe_unstall_root(void); + +unsigned long __ipipe_test_root(void); + +unsigned long __ipipe_test_and_stall_root(void); + +void fastcall __ipipe_restore_root(unsigned long flags); + +#define local_save_flags(x) ((x) = (!__ipipe_test_root()) << 9) +#define local_irq_save(x) ((x) = (!__ipipe_test_and_stall_root()) << 9) +#define local_irq_restore(x) __ipipe_restore_root(!(x & 0x200)) +#define local_irq_disable() __ipipe_stall_root() +#define local_irq_enable() __ipipe_unstall_root() + +#define irqs_disabled() __ipipe_test_root() + +#define halt() __asm__ __volatile__("hlt": : :"memory") + +#ifdef CONFIG_IPIPE_TRACE_IRQSOFF + +#include + +#define safe_halt() do { \ + __ipipe_unstall_root(); \ + ipipe_trace_end(0x8000000E); \ + __asm__ __volatile__("sti; hlt": : :"memory"); \ +} while(0) + +#define ipipe_hw_save_flags_and_sti(x) do { \ + ipipe_trace_end(0x8000000F); \ + __asm__ __volatile__("pushfl ; popl %0 ; sti":"=g" (x): /* no input */ :"memory"); \ +} while (0) +#define local_irq_disable_hw() do { \ + if (!irqs_disabled_hw()) { \ + __asm__ __volatile__("cli": : :"memory"); \ + ipipe_trace_begin(0x80000000); \ + } \ +} while (0) +#define local_irq_enable_hw() do { \ + if (irqs_disabled_hw()) { \ + ipipe_trace_end(0x80000000); \ + __asm__ __volatile__("sti": : :"memory"); \ + } \ +} while (0) +#define local_irq_save_hw(x) do { \ + local_save_flags_hw(x); \ + if (local_test_iflag_hw(x)) { \ + __asm__ __volatile__("cli": : :"memory"); \ + ipipe_trace_begin(0x80000001); \ + } \ +} while (0) +#define local_irq_restore_hw(x) do { \ + if (local_test_iflag_hw(x)) \ + ipipe_trace_end(0x80000001); \ + __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); \ +} while (0) + +#define local_irq_disable_hw_notrace() \ + __asm__ __volatile__("cli": : :"memory") +#define local_irq_enable_hw_notrace() \ + __asm__ __volatile__("sti": : :"memory") +#define local_irq_save_hw_notrace(x) \ + __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#define local_irq_restore_hw_notrace(x) \ + __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc") + +#else /* !CONFIG_IPIPE_TRACE_IRQSOFF */ + +#define safe_halt() do { \ + __ipipe_unstall_root(); \ + __asm__ __volatile__("sti; hlt": : :"memory"); \ +} while(0) + +#define ipipe_hw_save_flags_and_sti(x) __asm__ __volatile__("pushfl ; popl %0 ; sti":"=g" (x): /* no input */ :"memory") +#define local_irq_disable_hw() __asm__ __volatile__("cli": : :"memory") +#define local_irq_enable_hw() __asm__ __volatile__("sti": : :"memory") +#define local_irq_save_hw(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#define local_irq_restore_hw(x) __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc") + +#define local_irq_disable_hw_notrace local_irq_disable_hw +#define local_irq_enable_hw_notrace local_irq_enable_hw +#define local_irq_save_hw_notrace local_irq_save_hw +#define local_irq_restore_hw_notrace local_irq_restore_hw + +#endif /* CONFIG_IPIPE_TRACE_IRQSOFF */ + +#define local_save_flags_hw(x) __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */) +#define local_test_iflag_hw(x) ((x) & (1<<9)) +#define irqs_disabled_hw() \ +({ \ + unsigned long flags; \ + local_save_flags_hw(flags); \ + !local_test_iflag_hw(flags); \ +}) + +#else /* !CONFIG_IPIPE */ + #define local_save_flags(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0) #define local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0) #define local_irq_disable() __asm__ __volatile__("cli": : :"memory") @@ -538,6 +641,13 @@ struct alt_instr { /* For spinlocks etc */ #define local_irq_save(x) __asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory") +#define local_irq_save_hw(flags) local_irq_save(flags) +#define local_irq_restore_hw(flags) local_irq_restore(flags) +#define local_irq_enable_hw() local_irq_enable() +#define local_irq_disable_hw(flags) local_irq_disable() + +#endif /* CONFIG_IPIPE */ + /* * disable hlt during certain critical i/o operations */ Index: linux-2.6.16.33/include/linux/hardirq.h =================================================================== --- linux-2.6.16.33.orig/include/linux/hardirq.h +++ linux-2.6.16.33/include/linux/hardirq.h @@ -87,8 +87,21 @@ extern void synchronize_irq(unsigned int # define synchronize_irq(irq) barrier() #endif +#ifdef CONFIG_IPIPE +#define nmi_enter() \ +do { \ + if (ipipe_current_domain == ipipe_root_domain) \ + irq_enter(); \ +} while(0) +#define nmi_exit() \ +do { \ + if (ipipe_current_domain == ipipe_root_domain) \ + sub_preempt_count(HARDIRQ_OFFSET); \ +} while(0) +#else /* !CONFIG_IPIPE */ #define nmi_enter() irq_enter() #define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET) +#endif /* CONFIG_IPIPE */ struct task_struct; Index: linux-2.6.16.33/include/linux/ipipe.h =================================================================== --- /dev/null +++ linux-2.6.16.33/include/linux/ipipe.h @@ -0,0 +1,770 @@ +/* -*- linux-c -*- + * include/linux/ipipe.h + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef __LINUX_IPIPE_H +#define __LINUX_IPIPE_H + +#include +#include +#include +#include +#include + +#ifdef CONFIG_IPIPE + +#define IPIPE_VERSION_STRING IPIPE_ARCH_STRING +#define IPIPE_RELEASE_NUMBER ((IPIPE_MAJOR_NUMBER << 16) | \ + (IPIPE_MINOR_NUMBER << 8) | \ + (IPIPE_PATCH_NUMBER)) + +#ifndef BROKEN_BUILTIN_RETURN_ADDRESS +#define __BUILTIN_RETURN_ADDRESS0 ((unsigned long)__builtin_return_address(0)) +#define __BUILTIN_RETURN_ADDRESS1 ((unsigned long)__builtin_return_address(1)) +#endif /* !BUILTIN_RETURN_ADDRESS */ + +#define IPIPE_ROOT_PRIO 100 +#define IPIPE_ROOT_ID 0 +#define IPIPE_ROOT_NPTDKEYS 4 /* Must be <= BITS_PER_LONG */ + +#define IPIPE_RESET_TIMER 0x1 +#define IPIPE_GRAB_TIMER 0x2 + +/* Global domain flags */ +#define IPIPE_SPRINTK_FLAG 0 /* Synchronous printk() allowed */ +#define IPIPE_AHEAD_FLAG 1 /* Domain always heads the pipeline */ + +/* Per-cpu pipeline status */ +#define IPIPE_STALL_FLAG 0 /* Stalls a pipeline stage -- guaranteed at bit #0 */ +#define IPIPE_SYNC_FLAG 1 /* The interrupt syncer is running for the domain */ +#define IPIPE_NOSTACK_FLAG 2 /* Domain currently runs on a foreign stack */ + +#define IPIPE_SYNC_MASK (1 << IPIPE_SYNC_FLAG) + +/* Interrupt control bits */ +#define IPIPE_HANDLE_FLAG 0 +#define IPIPE_PASS_FLAG 1 +#define IPIPE_ENABLE_FLAG 2 +#define IPIPE_DYNAMIC_FLAG IPIPE_HANDLE_FLAG +#define IPIPE_STICKY_FLAG 3 +#define IPIPE_SYSTEM_FLAG 4 +#define IPIPE_LOCK_FLAG 5 +#define IPIPE_SHARED_FLAG 6 +#define IPIPE_WIRED_FLAG 7 +#define IPIPE_EXCLUSIVE_FLAG 8 + +#define IPIPE_HANDLE_MASK (1 << IPIPE_HANDLE_FLAG) +#define IPIPE_PASS_MASK (1 << IPIPE_PASS_FLAG) +#define IPIPE_ENABLE_MASK (1 << IPIPE_ENABLE_FLAG) +#define IPIPE_DYNAMIC_MASK IPIPE_HANDLE_MASK +#define IPIPE_STICKY_MASK (1 << IPIPE_STICKY_FLAG) +#define IPIPE_SYSTEM_MASK (1 << IPIPE_SYSTEM_FLAG) +#define IPIPE_LOCK_MASK (1 << IPIPE_LOCK_FLAG) +#define IPIPE_SHARED_MASK (1 << IPIPE_SHARED_FLAG) +#define IPIPE_WIRED_MASK (1 << IPIPE_WIRED_FLAG) +#define IPIPE_EXCLUSIVE_MASK (1 << IPIPE_EXCLUSIVE_FLAG) + +#define IPIPE_DEFAULT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK) +#define IPIPE_STDROOT_MASK (IPIPE_HANDLE_MASK|IPIPE_PASS_MASK|IPIPE_SYSTEM_MASK) + +#define IPIPE_EVENT_SELF 0x80000000 + +/* Number of virtual IRQs */ +#define IPIPE_NR_VIRQS BITS_PER_LONG +/* First virtual IRQ # */ +#define IPIPE_VIRQ_BASE (((IPIPE_NR_XIRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) * BITS_PER_LONG) +/* Total number of IRQ slots */ +#define IPIPE_NR_IRQS (IPIPE_VIRQ_BASE + IPIPE_NR_VIRQS) +/* Number of indirect words needed to map the whole IRQ space. */ +#define IPIPE_IRQ_IWORDS ((IPIPE_NR_IRQS + BITS_PER_LONG - 1) / BITS_PER_LONG) +#define IPIPE_IRQ_IMASK (BITS_PER_LONG - 1) +#define IPIPE_IRQMASK_ANY (~0L) +#define IPIPE_IRQMASK_VIRT (IPIPE_IRQMASK_ANY << (IPIPE_VIRQ_BASE / BITS_PER_LONG)) + +#ifdef CONFIG_SMP + +#define IPIPE_NR_CPUS NR_CPUS +#define ipipe_declare_cpuid int cpuid +#define ipipe_load_cpuid() do { \ + (cpuid) = ipipe_processor_id(); \ + } while(0) +#define ipipe_lock_cpu(flags) do { \ + local_irq_save_hw(flags); \ + (cpuid) = ipipe_processor_id(); \ + } while(0) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) ipipe_lock_cpu(flags) +#define ipipe_put_cpu(flags) ipipe_unlock_cpu(flags) +#define ipipe_current_domain per_cpu(ipipe_percpu_domain, ipipe_processor_id()) + +#else /* !CONFIG_SMP */ + +#define IPIPE_NR_CPUS 1 +#define ipipe_declare_cpuid const int cpuid = 0 +#define ipipe_load_cpuid() do { } while(0) +#define ipipe_lock_cpu(flags) local_irq_save_hw(flags) +#define ipipe_unlock_cpu(flags) local_irq_restore_hw(flags) +#define ipipe_get_cpu(flags) do { (void)(flags); } while(0) +#define ipipe_put_cpu(flags) do { } while(0) +#define ipipe_current_domain per_cpu(ipipe_percpu_domain, 0) + +#endif /* CONFIG_SMP */ + +#define ipipe_virtual_irq_p(irq) ((irq) >= IPIPE_VIRQ_BASE && \ + (irq) < IPIPE_NR_IRQS) + +typedef void (*ipipe_irq_handler_t)(unsigned irq, + void *cookie); + +#define IPIPE_SAME_HANDLER ((ipipe_irq_handler_t)(-1)) + +typedef int (*ipipe_irq_ackfn_t)(unsigned irq); + +typedef int (*ipipe_event_handler_t)(unsigned event, + struct ipipe_domain *from, + void *data); +struct ipipe_domain { + + struct list_head p_link; /* Link in pipeline */ + + struct ipcpudata { + unsigned long status; + unsigned long irq_pending_hi; + unsigned long irq_pending_lo[IPIPE_IRQ_IWORDS]; + struct ipirqcnt { + unsigned long pending_hits; + unsigned long total_hits; + } irq_counters[IPIPE_NR_IRQS]; + unsigned long long evsync; + } ____cacheline_aligned_in_smp cpudata[IPIPE_NR_CPUS]; + + struct { + unsigned long control; + ipipe_irq_ackfn_t acknowledge; + ipipe_irq_handler_t handler; + void *cookie; + } ____cacheline_aligned irqs[IPIPE_NR_IRQS]; + + ipipe_event_handler_t evhand[IPIPE_NR_EVENTS]; /* Event handlers. */ + unsigned long long evself; /* Self-monitored event bits. */ + unsigned long flags; + unsigned domid; + const char *name; + int priority; + void *pdd; +}; + +#define IPIPE_HEAD_PRIORITY (-1) /* For domains always heading the pipeline */ + +struct ipipe_domain_attr { + + unsigned domid; /* Domain identifier -- Magic value set by caller */ + const char *name; /* Domain name -- Warning: won't be dup'ed! */ + int priority; /* Priority in interrupt pipeline */ + void (*entry) (void); /* Domain entry point */ + void *pdd; /* Per-domain (opaque) data pointer */ +}; + +/* The following macros must be used hw interrupts off. */ + +#define __ipipe_irq_cookie(ipd,irq) (ipd)->irqs[irq].cookie +#define __ipipe_irq_handler(ipd,irq) (ipd)->irqs[irq].handler + +#define __ipipe_cpudata_irq_hits(ipd,cpuid,irq) ((ipd)->cpudata[cpuid].irq_counters[irq].total_hits) + +#define __ipipe_set_irq_bit(ipd,cpuid,irq) \ +do { \ + if (!test_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) { \ + __set_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + __set_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[cpuid].irq_pending_hi); \ + } \ +} while(0) + +#define __ipipe_clear_pend(ipd,cpuid,irq) \ +do { \ + __clear_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + if ((ipd)->cpudata[cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT] == 0) \ + __clear_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[cpuid].irq_pending_hi); \ +} while(0) + +#define __ipipe_lock_irq(ipd,cpuid,irq) \ +do { \ + if (!test_and_set_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) \ + __ipipe_clear_pend(ipd,cpuid,irq); \ +} while(0) + +#define __ipipe_unlock_irq(ipd,irq) \ +do { \ + int __cpuid, __nr_cpus = num_online_cpus(); \ + if (test_and_clear_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control)) \ + for (__cpuid = 0; __cpuid < __nr_cpus; __cpuid++) \ + if ((ipd)->cpudata[__cpuid].irq_counters[irq].pending_hits > 0) { /* We need atomic ops next. */ \ + set_bit(irq & IPIPE_IRQ_IMASK,&(ipd)->cpudata[__cpuid].irq_pending_lo[irq >> IPIPE_IRQ_ISHIFT]); \ + set_bit(irq >> IPIPE_IRQ_ISHIFT,&(ipd)->cpudata[__cpuid].irq_pending_hi); \ + } \ +} while(0) + +#define __ipipe_clear_irq(ipd,irq) \ +do { \ + int __cpuid, __nr_cpus = num_online_cpus(); \ + clear_bit(IPIPE_LOCK_FLAG,&(ipd)->irqs[irq].control); \ + for (__cpuid = 0; __cpuid < __nr_cpus; __cpuid++) { \ + (ipd)->cpudata[__cpuid].irq_counters[irq].pending_hits = 0; \ + __ipipe_clear_pend(ipd,__cpuid,irq); \ + } \ +} while(0) + +#ifdef __RAW_SPIN_LOCK_UNLOCKED +#define spin_lock_hw(x) __raw_spin_lock(&(x)->raw_lock) +#define spin_trylock_hw(x) __raw_spin_trylock(&(x)->raw_lock) +#define spin_unlock_hw(x) __raw_spin_unlock(&(x)->raw_lock) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define write_lock_hw(x) __raw_write_lock(&(x)->raw_lock) +#define write_trylock_hw(x) __raw_write_trylock(&(x)->raw_lock) +#define write_unlock_hw(x) __raw_write_unlock(&(x)->raw_lock) +#define read_lock_hw(x) __raw_read_lock(&(x)->raw_lock) +#define read_trylock_hw(x) __raw_read_trylock(&(x)->raw_lock) +#define read_unlock_hw(x) __raw_read_unlock(&(x)->raw_lock) +#else /* UP non-debug */ +#define write_lock_hw(lock) do { (void)(lock); } while (0) +#define write_trylock_hw(lock) ({ (void)(lock); 1; }) +#define write_unlock_hw(lock) do { (void)(lock); } while (0) +#define read_lock_hw(lock) do { (void)(lock); } while (0) +#define read_trylock_hw(lock) ({ (void)(lock); 1; }) +#define read_unlock_hw(lock) do { (void)(lock); } while (0) +#endif /* CONFIG_SMP || CONFIG_DEBUG_SPINLOCK */ +#else /* !__RAW_SPIN_LOCK_UNLOCKED */ +#define spin_lock_hw(x) _raw_spin_lock(x) +#define spin_unlock_hw(x) _raw_spin_unlock(x) +#define spin_trylock_hw(x) _raw_spin_trylock(x) +#define write_lock_hw(x) _raw_write_lock(x) +#define write_unlock_hw(x) _raw_write_unlock(x) +#define write_trylock_hw(x) _raw_write_trylock(x) +#define read_lock_hw(x) _raw_read_lock(x) +#define read_unlock_hw(x) _raw_read_unlock(x) +#endif /* __RAW_SPIN_LOCK_UNLOCKED */ + +typedef spinlock_t ipipe_spinlock_t; +typedef rwlock_t ipipe_rwlock_t; +#define IPIPE_SPIN_LOCK_UNLOCKED SPIN_LOCK_UNLOCKED +#define IPIPE_RW_LOCK_UNLOCKED RW_LOCK_UNLOCKED + +#define spin_lock_irqsave_hw(x,flags) \ +do { \ + local_irq_save_hw(flags); \ + spin_lock_hw(x); \ +} while (0) + +#define spin_unlock_irqrestore_hw(x,flags) \ +do { \ + spin_unlock_hw(x); \ + local_irq_restore_hw(flags); \ +} while (0) + +#define spin_lock_irq_hw(x) \ +do { \ + local_irq_disable_hw(); \ + spin_lock_hw(x); \ +} while (0) + +#define spin_unlock_irq_hw(x) \ +do { \ + spin_unlock_hw(x); \ + local_irq_enable_hw(); \ +} while (0) + +#define read_lock_irqsave_hw(lock, flags) \ +do { \ + local_irq_save_hw(flags); \ + read_lock_hw(lock); \ +} while (0) + +#define read_unlock_irqrestore_hw(lock, flags) \ +do { \ + read_unlock_hw(lock); \ + local_irq_restore_hw(flags); \ +} while (0) + +#define write_lock_irqsave_hw(lock, flags) \ +do { \ + local_irq_save_hw(flags); \ + write_lock_hw(lock); \ +} while (0) + +#define write_unlock_irqrestore_hw(lock, flags) \ +do { \ + write_unlock_hw(lock); \ + local_irq_restore_hw(flags); \ +} while (0) + +DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); + +extern struct ipipe_domain ipipe_root; + +#define ipipe_root_domain (&ipipe_root) + +extern unsigned __ipipe_printk_virq; + +extern unsigned long __ipipe_virtual_irq_map; + +extern struct list_head __ipipe_pipeline; + +extern ipipe_spinlock_t __ipipe_pipelock; + +extern int __ipipe_event_monitors[]; + +/* Private interface */ + +void ipipe_init(void); + +#ifdef CONFIG_PROC_FS +void ipipe_init_proc(void); + +#ifdef CONFIG_IPIPE_TRACE +void __ipipe_init_tracer(void); +#else /* !CONFIG_IPIPE_TRACE */ +#define __ipipe_init_tracer() do { } while(0) +#endif /* CONFIG_IPIPE_TRACE */ + +#else /* !CONFIG_PROC_FS */ +#define ipipe_init_proc() do { } while(0) +#endif /* CONFIG_PROC_FS */ + +void __ipipe_init_stage(struct ipipe_domain *ipd); + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd); + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd); + +void __ipipe_flush_printk(unsigned irq, void *cookie); + +void __ipipe_stall_root(void); + +void __ipipe_unstall_root(void); + +unsigned long __ipipe_test_root(void); + +unsigned long __ipipe_test_and_stall_root(void); + +void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid); + +void fastcall __ipipe_restore_root(unsigned long x); + +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head); + +int fastcall __ipipe_dispatch_event(unsigned event, void *data); + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq); + +void fastcall __ipipe_sync_stage(unsigned long syncmask); + +#ifndef __ipipe_sync_pipeline +#define __ipipe_sync_pipeline(syncmask) __ipipe_sync_stage(syncmask) +#endif + +#ifndef __ipipe_run_irqtail +#define __ipipe_run_irqtail() do { } while(0) +#endif + +#define __ipipe_pipeline_head_p(ipd) (&(ipd)->p_link == __ipipe_pipeline.next) + +/* + * Keep the following as a macro, so that client code could check for + * the support of the invariant pipeline head optimization. + */ +#define __ipipe_pipeline_head() list_entry(__ipipe_pipeline.next,struct ipipe_domain,p_link) + +#define __ipipe_event_monitored_p(ev) \ + (__ipipe_event_monitors[ev] > 0 || (ipipe_current_domain->evself & (1LL << ev))) + +#ifdef CONFIG_SMP + +cpumask_t __ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall __ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +#endif /* CONFIG_SMP */ + +/* Called with hw interrupts off. */ +static inline void __ipipe_switch_to(struct ipipe_domain *out, + struct ipipe_domain *in, int cpuid) +{ + void ipipe_suspend_domain(void); + + /* + * "in" is guaranteed to be closer than "out" from the head of the + * pipeline (and obviously different). + */ + + out->cpudata[cpuid].evsync = 0; + per_cpu(ipipe_percpu_domain, cpuid) = in; + + ipipe_suspend_domain(); /* Sync stage and propagate interrupts. */ + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) == in) + /* + * Otherwise, something has changed the current domain under + * our feet recycling the register set; do not override. + */ + per_cpu(ipipe_percpu_domain, cpuid) = out; +} + +#define ipipe_sigwake_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SIGWAKE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SIGWAKE,p); \ +} while(0) + +#define ipipe_exit_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_EXIT)) \ + __ipipe_dispatch_event(IPIPE_EVENT_EXIT,p); \ +} while(0) + +#define ipipe_setsched_notify(p) \ +do { \ + if (((p)->flags & PF_EVNOTIFY) && __ipipe_event_monitored_p(IPIPE_EVENT_SETSCHED)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SETSCHED,p); \ +} while(0) + +#define ipipe_schedule_notify(prev, next) \ +do { \ + if ((((prev)->flags|(next)->flags) & PF_EVNOTIFY) && \ + __ipipe_event_monitored_p(IPIPE_EVENT_SCHEDULE)) \ + __ipipe_dispatch_event(IPIPE_EVENT_SCHEDULE,next); \ +} while(0) + +#define ipipe_trap_notify(ex, regs) \ +({ \ + ipipe_declare_cpuid; \ + int ret = 0; \ + ipipe_load_cpuid(); \ + if ((test_bit(IPIPE_NOSTACK_FLAG, &ipipe_current_domain->cpudata[cpuid].status) || \ + ((current)->flags & PF_EVNOTIFY)) && \ + __ipipe_event_monitored_p(ex)) \ + ret = __ipipe_dispatch_event(ex, regs); \ + ret; \ +}) + +static inline void ipipe_init_notify(struct task_struct *p) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_INIT)) + __ipipe_dispatch_event(IPIPE_EVENT_INIT,p); +} + +struct mm_struct; + +static inline void ipipe_cleanup_notify(struct mm_struct *mm) +{ + if (__ipipe_event_monitored_p(IPIPE_EVENT_CLEANUP)) + __ipipe_dispatch_event(IPIPE_EVENT_CLEANUP,mm); +} + +/* Public interface */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr); + +int ipipe_unregister_domain(struct ipipe_domain *ipd); + +void ipipe_suspend_domain(void); + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask); + +static inline int ipipe_share_irq(unsigned irq, + ipipe_irq_ackfn_t acknowledge) +{ + return ipipe_virtualize_irq(ipipe_current_domain, + irq, + IPIPE_SAME_HANDLER, + NULL, + acknowledge, + IPIPE_SHARED_MASK | IPIPE_HANDLE_MASK | + IPIPE_PASS_MASK); +} + +int ipipe_control_irq(unsigned irq, + unsigned clrmask, + unsigned setmask); + +unsigned ipipe_alloc_virq(void); + +int ipipe_free_virq(unsigned virq); + +int fastcall ipipe_trigger_irq(unsigned irq); + +static inline int ipipe_propagate_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, ipipe_current_domain->p_link.next); +} + +static inline int ipipe_schedule_irq(unsigned irq) +{ + return __ipipe_schedule_irq(irq, &ipipe_current_domain->p_link); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd); + +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd); + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd); + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x); + +static inline unsigned long ipipe_test_pipeline_from(struct ipipe_domain *ipd) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); + x = test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} + +static inline void ipipe_restore_pipeline_nosync(struct ipipe_domain *ipd, + unsigned long x, int cpuid) +{ + /* + * If cpuid is current, then it must be held on entry + * (ipipe_get_cpu/local_irq_save_hw/local_irq_disable_hw). + */ + + if (x) + __set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + else + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); +} + +static inline void ipipe_stall_pipeline_head(void) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_lock_cpu(flags); + __set_bit(IPIPE_STALL_FLAG, &__ipipe_pipeline_head()->cpudata[cpuid].status); +} + +static inline unsigned long ipipe_test_and_stall_pipeline_head(void) +{ + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + return __test_and_set_bit(IPIPE_STALL_FLAG, &__ipipe_pipeline_head()->cpudata[cpuid].status); +} + +void ipipe_unstall_pipeline_head(void); + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head, + unsigned long x); + +static inline void ipipe_restore_pipeline_head(unsigned long x) +{ + struct ipipe_domain *head = __ipipe_pipeline_head(); + /* On some archs, __test_and_set_bit() might return different + * truth value than test_bit(), so we test the exclusive OR of + * both statuses, assuming that the lowest bit is always set in + * the truth value (if this is wrong, the failed optimization will + * be caught in __ipipe_restore_pipeline_head() if + * CONFIG_DEBUG_KERNEL is set). */ + if ((x ^ test_bit(IPIPE_STALL_FLAG, &head->cpudata[ipipe_processor_id()].status)) & 1) + __ipipe_restore_pipeline_head(head,x); +} + +#define ipipe_unstall_pipeline() \ + ipipe_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_unstall_pipeline() \ + ipipe_test_and_unstall_pipeline_from(ipipe_current_domain) + +#define ipipe_test_pipeline() \ + ipipe_test_pipeline_from(ipipe_current_domain) + +#define ipipe_test_and_stall_pipeline() \ + ipipe_test_and_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_stall_pipeline() \ + ipipe_stall_pipeline_from(ipipe_current_domain) + +#define ipipe_restore_pipeline(x) \ + ipipe_restore_pipeline_from(ipipe_current_domain, (x)) + +void ipipe_init_attr(struct ipipe_domain_attr *attr); + +int ipipe_get_sysinfo(struct ipipe_sysinfo *sysinfo); + +int ipipe_tune_timer(unsigned long ns, + int flags); + +unsigned long ipipe_critical_enter(void (*syncfn) (void)); + +void ipipe_critical_exit(unsigned long flags); + +static inline void ipipe_set_printk_sync(struct ipipe_domain *ipd) +{ + set_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_printk_async(struct ipipe_domain *ipd) +{ + clear_bit(IPIPE_SPRINTK_FLAG, &ipd->flags); +} + +static inline void ipipe_set_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + ipipe_declare_cpuid; + ipipe_load_cpuid(); + __set_bit(IPIPE_NOSTACK_FLAG, &ipd->cpudata[cpuid].status); +} + +static inline void ipipe_clear_foreign_stack(struct ipipe_domain *ipd) +{ + /* Must be called hw interrupts off. */ + ipipe_declare_cpuid; + ipipe_load_cpuid(); + __clear_bit(IPIPE_NOSTACK_FLAG, &ipd->cpudata[cpuid].status); +} + +#define ipipe_safe_current() \ +({ \ + ipipe_declare_cpuid; \ + struct task_struct *p; \ + ipipe_load_cpuid(); \ + p = test_bit(IPIPE_NOSTACK_FLAG, \ + &per_cpu(ipipe_percpu_domain, cpuid)->cpudata[cpuid].status) ? &init_task : current; \ + p; \ +}) + +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler); + +cpumask_t ipipe_set_irq_affinity(unsigned irq, + cpumask_t cpumask); + +int fastcall ipipe_send_ipi(unsigned ipi, + cpumask_t cpumask); + +int ipipe_setscheduler_root(struct task_struct *p, + int policy, + int prio); + +int ipipe_reenter_root(struct task_struct *prev, + int policy, + int prio); + +int ipipe_alloc_ptdkey(void); + +int ipipe_free_ptdkey(int key); + +int fastcall ipipe_set_ptd(int key, + void *value); + +void fastcall *ipipe_get_ptd(int key); + +#define local_irq_enable_hw_cond() local_irq_enable_hw() +#define local_irq_disable_hw_cond() local_irq_disable_hw() +#define local_irq_save_hw_cond(flags) local_irq_save_hw(flags) +#define local_irq_restore_hw_cond(flags) local_irq_restore_hw(flags) + +#define local_irq_enable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_enable_hw(); \ + } while(0) + +#define local_irq_disable_nohead(ipd) \ + do { \ + if (!__ipipe_pipeline_head_p(ipd)) \ + local_irq_disable_hw(); \ + } while(0) + +#define spin_lock_irqsave_hw_cond(lock,flags) spin_lock_irqsave_hw(lock,flags) +#define spin_unlock_irqrestore_hw_cond(lock,flags) spin_unlock_irqrestore_hw(lock,flags) +#define smp_processor_id_hw() ipipe_processor_id() + +#define ipipe_irq_lock(irq) \ + do { \ + ipipe_declare_cpuid; \ + ipipe_load_cpuid(); \ + __ipipe_lock_irq(per_cpu(ipipe_percpu_domain, cpuid), cpuid, irq);\ + } while(0) + +#define ipipe_irq_unlock(irq) \ + do { \ + ipipe_declare_cpuid; \ + ipipe_load_cpuid(); \ + __ipipe_unlock_irq(per_cpu(ipipe_percpu_domain, cpuid), irq); \ + } while(0) + +#define ipipe_root_domain_p (ipipe_current_domain == ipipe_root_domain) + +#else /* !CONFIG_IPIPE */ + +#define ipipe_init() do { } while(0) +#define ipipe_suspend_domain() do { } while(0) +#define ipipe_sigwake_notify(p) do { } while(0) +#define ipipe_setsched_notify(p) do { } while(0) +#define ipipe_init_notify(p) do { } while(0) +#define ipipe_exit_notify(p) do { } while(0) +#define ipipe_cleanup_notify(mm) do { } while(0) +#define ipipe_trap_notify(t,r) 0 +#define ipipe_init_proc() do { } while(0) + +#define spin_lock_hw(lock) spin_lock(lock) +#define spin_unlock_hw(lock) spin_unlock(lock) +#define spin_lock_irq_hw(lock) spin_lock_irq(lock) +#define spin_unlock_irq_hw(lock) spin_unlock_irq(lock) +#define spin_lock_irqsave_hw(lock,flags) spin_lock_irqsave(lock, flags) +#define spin_unlock_irqrestore_hw(lock,flags) spin_unlock_irqrestore(lock, flags) + +#define local_irq_enable_hw_cond() do { } while(0) +#define local_irq_disable_hw_cond() do { } while(0) +#define local_irq_save_hw_cond(flags) do { (void)(flags); } while(0) +#define local_irq_restore_hw_cond(flags) do { } while(0) +#define spin_lock_irqsave_hw_cond(lock,flags) do { (void)(flags); spin_lock(lock); } while(0) +#define spin_unlock_irqrestore_hw_cond(lock,flags) spin_unlock(lock) +#define smp_processor_id_hw() smp_processor_id() + +#define ipipe_irq_lock(irq) do { } while(0) +#define ipipe_irq_unlock(irq) do { } while(0) + +#define ipipe_root_domain_p 1 +#define ipipe_safe_current current + +#endif /* CONFIG_IPIPE */ + +#endif /* !__LINUX_IPIPE_H */ Index: linux-2.6.16.33/include/linux/ipipe_trace.h =================================================================== --- /dev/null +++ linux-2.6.16.33/include/linux/ipipe_trace.h @@ -0,0 +1,40 @@ +/* -*- linux-c -*- + * include/linux/ipipe_trace.h + * + * Copyright (C) 2005 Luotao Fu. + * 2005, 2006 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef _LINUX_IPIPE_TRACE_H +#define _LINUX_IPIPE_TRACE_H + +#include + +void ipipe_trace_begin(unsigned long v); +void ipipe_trace_end(unsigned long v); +void ipipe_trace_freeze(unsigned long v); +void ipipe_trace_special(unsigned char special_id, unsigned long v); +void ipipe_trace_pid(pid_t pid, short prio); + +int ipipe_trace_max_reset(void); +int ipipe_trace_frozen_reset(void); + +void ipipe_trace_panic_freeze(void); +void ipipe_trace_panic_dump(void); + +#endif /* !__LINUX_IPIPE_H */ Index: linux-2.6.16.33/include/linux/linkage.h =================================================================== --- linux-2.6.16.33.orig/include/linux/linkage.h +++ linux-2.6.16.33/include/linux/linkage.h @@ -51,4 +51,8 @@ #define fastcall #endif +#ifndef notrace +#define notrace __attribute__((no_instrument_function)) +#endif + #endif Index: linux-2.6.16.33/include/linux/preempt.h =================================================================== --- linux-2.6.16.33.orig/include/linux/preempt.h +++ linux-2.6.16.33/include/linux/preempt.h @@ -27,29 +27,44 @@ asmlinkage void preempt_schedule(void); -#define preempt_disable() \ -do { \ - inc_preempt_count(); \ - barrier(); \ +#ifdef CONFIG_IPIPE +#include +DECLARE_PER_CPU(struct ipipe_domain *, ipipe_percpu_domain); +extern struct ipipe_domain ipipe_root; +#define ipipe_preempt_guard() (per_cpu(ipipe_percpu_domain, ipipe_processor_id()) == &ipipe_root) +#else /* !CONFIG_IPIPE */ +#define ipipe_preempt_guard() 1 +#endif /* CONFIG_IPIPE */ + +#define preempt_disable() \ +do { \ + if (ipipe_preempt_guard()) { \ + inc_preempt_count(); \ + barrier(); \ + } \ } while (0) -#define preempt_enable_no_resched() \ -do { \ - barrier(); \ - dec_preempt_count(); \ +#define preempt_enable_no_resched() \ +do { \ + if (ipipe_preempt_guard()) { \ + barrier(); \ + dec_preempt_count(); \ + } \ } while (0) -#define preempt_check_resched() \ -do { \ - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ - preempt_schedule(); \ +#define preempt_check_resched() \ +do { \ + if (ipipe_preempt_guard()) { \ + if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \ + preempt_schedule(); \ + } \ } while (0) -#define preempt_enable() \ -do { \ - preempt_enable_no_resched(); \ +#define preempt_enable() \ +do { \ + preempt_enable_no_resched(); \ barrier(); \ - preempt_check_resched(); \ + preempt_check_resched(); \ } while (0) #else Index: linux-2.6.16.33/include/linux/sched.h =================================================================== --- linux-2.6.16.33.orig/include/linux/sched.h +++ linux-2.6.16.33/include/linux/sched.h @@ -4,6 +4,7 @@ #include /* for HZ */ #include +#include #include #include #include @@ -129,6 +130,13 @@ extern unsigned long nr_iowait(void); #define EXIT_DEAD 32 /* in tsk->state again */ #define TASK_NONINTERACTIVE 64 +#ifdef CONFIG_IPIPE +#define TASK_ATOMICSWITCH 512 +#define TASK_NOWAKEUP 1024 +#else /* !CONFIG_IPIPE */ +#define TASK_ATOMICSWITCH 0 +#define TASK_NOWAKEUP 0 +#endif /* CONFIG_IPIPE */ #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -871,6 +879,9 @@ struct task_struct { #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; +#ifdef CONFIG_IPIPE + void *ptd[IPIPE_ROOT_NPTDKEYS]; +#endif }; static inline pid_t process_group(struct task_struct *tsk) @@ -928,6 +939,11 @@ static inline void put_task_struct(struc #define PF_BORROWED_MM 0x00400000 /* I am a kthread doing use_mm */ #define PF_RANDOMIZE 0x00800000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ +#ifdef CONFIG_IPIPE +#define PF_EVNOTIFY 0x40000000 /* Notify other domains about internal events */ +#else +#define PF_EVNOTIFY 0 +#endif /* CONFIG_IPIPE */ /* * Only the _current_ task can read/write to tsk->flags, but other Index: linux-2.6.16.33/init/Kconfig =================================================================== --- linux-2.6.16.33.orig/init/Kconfig +++ linux-2.6.16.33/init/Kconfig @@ -58,6 +58,7 @@ menu "General setup" config LOCALVERSION string "Local version - append to kernel release" + default "-ipipe" help Append an extra string to the end of your kernel version. This will show up when you type uname, for example. Index: linux-2.6.16.33/init/main.c =================================================================== --- linux-2.6.16.33.orig/init/main.c +++ linux-2.6.16.33/init/main.c @@ -486,6 +486,11 @@ asmlinkage void __init start_kernel(void hrtimers_init(); softirq_init(); time_init(); + /* + * We need to wait for the interrupt and time subsystems to be + * initialized before enabling the pipeline. + */ + ipipe_init(); /* * HACK ALERT! This is early. We're enabling the console before @@ -611,6 +616,7 @@ static void __init do_basic_setup(void) #ifdef CONFIG_SYSCTL sysctl_init(); #endif + ipipe_init_proc(); do_initcalls(); } Index: linux-2.6.16.33/kernel/Makefile =================================================================== --- linux-2.6.16.33.orig/kernel/Makefile +++ linux-2.6.16.33/kernel/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softl obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o +obj-$(CONFIG_IPIPE) += ipipe/ ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is Index: linux-2.6.16.33/kernel/exit.c =================================================================== --- linux-2.6.16.33.orig/kernel/exit.c +++ linux-2.6.16.33/kernel/exit.c @@ -844,6 +844,7 @@ fastcall NORET_TYPE void do_exit(long co exit_itimers(tsk->signal); acct_process(code); } + ipipe_exit_notify(tsk); exit_mm(tsk); exit_sem(tsk); Index: linux-2.6.16.33/kernel/fork.c =================================================================== --- linux-2.6.16.33.orig/kernel/fork.c +++ linux-2.6.16.33/kernel/fork.c @@ -371,6 +371,7 @@ void fastcall __mmdrop(struct mm_struct void mmput(struct mm_struct *mm) { if (atomic_dec_and_test(&mm->mm_users)) { + ipipe_cleanup_notify(mm); exit_aio(mm); exit_mmap(mm); if (!list_empty(&mm->mmlist)) { @@ -885,7 +886,7 @@ static inline void copy_flags(unsigned l { unsigned long new_flags = p->flags; - new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); + new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE | PF_EVNOTIFY); new_flags |= PF_FORKNOEXEC; if (!(clone_flags & CLONE_PTRACE)) p->ptrace = 0; @@ -1198,6 +1199,14 @@ static task_t *copy_process(unsigned lon spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); proc_fork_connector(p); +#ifdef CONFIG_IPIPE + { + int k; + + for (k = 0; k < IPIPE_ROOT_NPTDKEYS; k++) + p->ptd[k] = NULL; + } +#endif /* CONFIG_IPIPE */ return p; bad_fork_cleanup_namespace: Index: linux-2.6.16.33/kernel/ipipe/Kconfig =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/Kconfig @@ -0,0 +1,6 @@ +config IPIPE + bool "Interrupt pipeline" + default y + ---help--- + Activate this option if you want the interrupt pipeline to be + compiled in. Index: linux-2.6.16.33/kernel/ipipe/Kconfig.debug =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/Kconfig.debug @@ -0,0 +1,69 @@ +config IPIPE_DEBUG + bool "I-pipe debugging" + depends on IPIPE + +config IPIPE_TRACE + bool "Latency tracing" + depends on IPIPE_DEBUG + select FRAME_POINTER + select KALLSYMS + select PROC_FS + ---help--- + Activate this option if you want to use per-function tracing of + the kernel. The tracer will collect data via instrumentation + features like the one below or with the help of explicite calls + of ipipe_trace_xxx(). See include/linux/ipipe_trace.h for the + in-kernel tracing API. The collected data and runtime control + is available via /proc/ipipe/trace/*. + +config IPIPE_TRACE_ENABLE + bool "Enable tracing on boot" + depends on IPIPE_TRACE + default y + ---help--- + Disable this option if you want to arm the tracer after booting + manually ("echo 1 > /proc/ipipe/tracer/enable"). This can reduce + boot time on slow embedded devices due to the tracer overhead. + +config IPIPE_TRACE_MCOUNT + bool "Instrument function entries" + depends on IPIPE_TRACE + default y + ---help--- + When enabled, records every kernel function entry in the tracer + log. While this slows down the system noticeably, it provides + the highest level of information about the flow of events. + However, it can be switch off in order to record only explicit + I-pipe trace points. + +config IPIPE_TRACE_IRQSOFF + bool "Trace IRQs-off times" + depends on IPIPE_TRACE + default y + ---help--- + Activate this option if I-pipe shall trace the longest path + with hard-IRQs switched off. + +config IPIPE_TRACE_SHIFT + int "Depth of trace log (14 => 16Kpoints, 15 => 32Kpoints)" + range 10 18 + default 14 + depends on IPIPE_TRACE + ---help--- + The number of trace points to hold tracing data for each + trace path, as a power of 2. + +config IPIPE_TRACE_VMALLOC + bool "Use vmalloc'ed trace buffer" + depends on IPIPE_TRACE + ---help--- + Instead of reserving static kernel data, the required buffer + is allocated via vmalloc during boot-up when this option is + enabled. This can help to start systems that are low on memory, + but it slightly degrades overall performance. Try this option + when a traced kernel hangs unexpectedly at boot time. + +config IPIPE_TRACE_ENABLE_VALUE + int + default 0 if !IPIPE_TRACE_ENABLE + default 1 if IPIPE_TRACE_ENABLE Index: linux-2.6.16.33/kernel/ipipe/Makefile =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/Makefile @@ -0,0 +1,3 @@ + +obj-$(CONFIG_IPIPE) += core.o generic.o +obj-$(CONFIG_IPIPE_TRACE) += tracer.o Index: linux-2.6.16.33/kernel/ipipe/core.c =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/core.c @@ -0,0 +1,1051 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/core.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-independent I-PIPE core support. + */ + +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#endif /* CONFIG_PROC_FS */ + +struct ipipe_domain ipipe_root = + { .cpudata = {[0 ... IPIPE_NR_CPUS-1] = + { .status = (1<name = "Linux"; + ipd->domid = IPIPE_ROOT_ID; + ipd->priority = IPIPE_ROOT_PRIO; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + list_add_tail(&ipd->p_link, &__ipipe_pipeline); + + __ipipe_init_platform(); + +#ifdef CONFIG_PRINTK + __ipipe_printk_virq = ipipe_alloc_virq(); /* Cannot fail here. */ + ipd->irqs[__ipipe_printk_virq].handler = &__ipipe_flush_printk; + ipd->irqs[__ipipe_printk_virq].cookie = NULL; + ipd->irqs[__ipipe_printk_virq].acknowledge = NULL; + ipd->irqs[__ipipe_printk_virq].control = IPIPE_HANDLE_MASK; +#endif /* CONFIG_PRINTK */ + + __ipipe_enable_pipeline(); + + printk(KERN_INFO "I-pipe %s: pipeline enabled.\n", + IPIPE_VERSION_STRING); +} + +void __ipipe_init_stage(struct ipipe_domain *ipd) +{ + int cpuid, n; + + for (cpuid = 0; cpuid < IPIPE_NR_CPUS; cpuid++) { + ipd->cpudata[cpuid].irq_pending_hi = 0; + + for (n = 0; n < IPIPE_IRQ_IWORDS; n++) + ipd->cpudata[cpuid].irq_pending_lo[n] = 0; + + for (n = 0; n < IPIPE_NR_IRQS; n++) { + ipd->cpudata[cpuid].irq_counters[n].total_hits = 0; + ipd->cpudata[cpuid].irq_counters[n].pending_hits = 0; + } + + ipd->cpudata[cpuid].evsync = 0; + } + + for (n = 0; n < IPIPE_NR_IRQS; n++) { + ipd->irqs[n].acknowledge = NULL; + ipd->irqs[n].handler = NULL; + ipd->irqs[n].control = IPIPE_PASS_MASK; /* Pass but don't handle */ + } + + for (n = 0; n < IPIPE_NR_EVENTS; n++) + ipd->evhand[n] = NULL; + + ipd->evself = 0LL; + +#ifdef CONFIG_SMP + ipd->irqs[IPIPE_CRITICAL_IPI].acknowledge = &__ipipe_ack_system_irq; + ipd->irqs[IPIPE_CRITICAL_IPI].handler = &__ipipe_do_critical_sync; + ipd->irqs[IPIPE_CRITICAL_IPI].cookie = NULL; + /* Immediately handle in the current domain but *never* pass */ + ipd->irqs[IPIPE_CRITICAL_IPI].control = + IPIPE_HANDLE_MASK|IPIPE_STICKY_MASK|IPIPE_SYSTEM_MASK; +#endif /* CONFIG_SMP */ +} + +void __ipipe_stall_root(void) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_get_cpu(flags); /* Care for migration. */ + set_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); +} + +void __ipipe_cleanup_domain(struct ipipe_domain *ipd) +{ + ipipe_unstall_pipeline_from(ipd); + +#ifdef CONFIG_SMP + { + int cpu; + + for_each_online_cpu(cpu) { + while (ipd->cpudata[cpu].irq_pending_hi != 0) + cpu_relax(); + } + } +#endif /* CONFIG_SMP */ +} + +void __ipipe_unstall_root(void) +{ + ipipe_declare_cpuid; + + local_irq_disable_hw(); + + ipipe_load_cpuid(); + + __clear_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + + if (unlikely(ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + local_irq_enable_hw(); +} + +unsigned long __ipipe_test_root(void) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); /* Care for migration. */ + x = test_bit(IPIPE_STALL_FLAG, &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} + +unsigned long __ipipe_test_and_stall_root(void) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); /* Care for migration. */ + x = test_and_set_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status); + ipipe_put_cpu(flags); + + return x; +} + +void fastcall __ipipe_restore_root(unsigned long x) +{ + if (x) + __ipipe_stall_root(); + else + __ipipe_unstall_root(); +} + +void fastcall ipipe_stall_pipeline_from(struct ipipe_domain *ipd) +{ + ipipe_declare_cpuid; +#ifdef CONFIG_SMP + unsigned long flags; + + ipipe_lock_cpu(flags); /* Care for migration. */ + + __set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (!__ipipe_pipeline_head_p(ipd)) + ipipe_unlock_cpu(flags); +#else /* CONFIG_SMP */ + set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); +#endif /* CONFIG_SMP */ +} + +unsigned long fastcall ipipe_test_and_stall_pipeline_from(struct ipipe_domain *ipd) +{ + ipipe_declare_cpuid; + unsigned long s; +#ifdef CONFIG_SMP + unsigned long flags; + + ipipe_lock_cpu(flags); /* Care for migration. */ + + s = __test_and_set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (!__ipipe_pipeline_head_p(ipd)) + ipipe_unlock_cpu(flags); +#else /* CONFIG_SMP */ + s = test_and_set_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_disable_hw(); +#endif /* CONFIG_SMP */ + + return s; +} + +/* + * ipipe_unstall_pipeline_from() -- Unstall the pipeline and + * synchronize pending interrupts for a given domain. See + * __ipipe_walk_pipeline() for more information. + */ +void fastcall ipipe_unstall_pipeline_from(struct ipipe_domain *ipd) +{ + struct list_head *pos; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + __clear_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + + if (ipd == per_cpu(ipipe_percpu_domain, cpuid)) + pos = &ipd->p_link; + else + pos = __ipipe_pipeline.next; + + __ipipe_walk_pipeline(pos, cpuid); + + if (__ipipe_pipeline_head_p(ipd)) + local_irq_enable_hw(); + else + ipipe_unlock_cpu(flags); +} + +unsigned long fastcall ipipe_test_and_unstall_pipeline_from(struct ipipe_domain *ipd) +{ + unsigned long flags, x; + ipipe_declare_cpuid; + + ipipe_get_cpu(flags); + x = test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpuid].status); + ipipe_unstall_pipeline_from(ipd); + ipipe_put_cpu(flags); + + return x; +} + +void fastcall ipipe_restore_pipeline_from(struct ipipe_domain *ipd, + unsigned long x) +{ + if (x) + ipipe_stall_pipeline_from(ipd); + else + ipipe_unstall_pipeline_from(ipd); +} + +void ipipe_unstall_pipeline_head(void) +{ + struct ipipe_domain *head; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + head = __ipipe_pipeline_head(); + __clear_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); + + if (unlikely(head->cpudata[cpuid].irq_pending_hi != 0)) { + if (likely(head == per_cpu(ipipe_percpu_domain, cpuid))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head->p_link, cpuid); + } + + local_irq_enable_hw(); +} + +void fastcall __ipipe_restore_pipeline_head(struct ipipe_domain *head, unsigned long x) +{ + ipipe_declare_cpuid; + unsigned long flags; + + ipipe_lock_cpu(flags); + + if (x) { +#ifdef CONFIG_DEBUG_KERNEL + static int warned; + if (!warned && test_and_set_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status)) { + /* + * Already stalled albeit ipipe_restore_pipeline_head() + * should have detected it? Send a warning once.\n"); + */ + warned = 1; + printk(KERN_WARNING + "I-pipe: ipipe_restore_pipeline_head() optimization failed.\n"); + dump_stack(); + } +#else /* !CONFIG_DEBUG_KERNEL */ + set_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); +#endif /* CONFIG_DEBUG_KERNEL */ + } + else { + __clear_bit(IPIPE_STALL_FLAG, &head->cpudata[cpuid].status); + if (unlikely(head->cpudata[cpuid].irq_pending_hi != 0)) { + if (likely(head == per_cpu(ipipe_percpu_domain, cpuid))) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else + __ipipe_walk_pipeline(&head->p_link, cpuid); + } + local_irq_enable_hw(); + } +} + +/* __ipipe_walk_pipeline(): Plays interrupts pending in the log. Must + be called with local hw interrupts disabled. */ + +void fastcall __ipipe_walk_pipeline(struct list_head *pos, int cpuid) +{ + struct ipipe_domain *this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + while (pos != &__ipipe_pipeline) { + struct ipipe_domain *next_domain = + list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit + (IPIPE_STALL_FLAG, &next_domain->cpudata[cpuid].status)) + break; /* Stalled stage -- do not go further. */ + + if (next_domain->cpudata[cpuid].irq_pending_hi != 0) { + + if (next_domain == this_domain) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + else { + __ipipe_switch_to(this_domain, next_domain, + cpuid); + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (this_domain->cpudata[cpuid]. + irq_pending_hi != 0 + && !test_bit(IPIPE_STALL_FLAG, + &this_domain->cpudata[cpuid].status)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + } + + break; + } else if (next_domain == this_domain) + break; + + pos = next_domain->p_link.next; + } +} + +/* + * ipipe_suspend_domain() -- Suspend the current domain, switching to + * the next one which has pending work down the pipeline. + */ +void ipipe_suspend_domain(void) +{ + struct ipipe_domain *this_domain, *next_domain; + struct list_head *ln; + unsigned long flags; + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + this_domain = next_domain = per_cpu(ipipe_percpu_domain, cpuid); + + __clear_bit(IPIPE_STALL_FLAG, &this_domain->cpudata[cpuid].status); + + if (this_domain->cpudata[cpuid].irq_pending_hi != 0) + goto sync_stage; + + for (;;) { + ln = next_domain->p_link.next; + + if (ln == &__ipipe_pipeline) + break; + + next_domain = list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, + &next_domain->cpudata[cpuid].status)) + break; + + if (next_domain->cpudata[cpuid].irq_pending_hi == 0) + continue; + + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + +sync_stage: + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + /* + * Something has changed the current domain under our + * feet, recycling the register set; take note. + */ + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + per_cpu(ipipe_percpu_domain, cpuid) = this_domain; + + ipipe_unlock_cpu(flags); +} + +/* ipipe_alloc_virq() -- Allocate a pipelined virtual/soft interrupt. + * Virtual interrupts are handled in exactly the same way than their + * hw-generated counterparts wrt pipelining. + */ +unsigned ipipe_alloc_virq(void) +{ + unsigned long flags, irq = 0; + int ipos; + + spin_lock_irqsave_hw(&__ipipe_pipelock, flags); + + if (__ipipe_virtual_irq_map != ~0) { + ipos = ffz(__ipipe_virtual_irq_map); + set_bit(ipos, &__ipipe_virtual_irq_map); + irq = ipos + IPIPE_VIRQ_BASE; + } + + spin_unlock_irqrestore_hw(&__ipipe_pipelock, flags); + + return irq; +} + +/* ipipe_virtualize_irq() -- Attach a handler (and optionally a hw + acknowledge routine) to an interrupt for a given domain. */ + +int ipipe_virtualize_irq(struct ipipe_domain *ipd, + unsigned irq, + ipipe_irq_handler_t handler, + void *cookie, + ipipe_irq_ackfn_t acknowledge, + unsigned modemask) +{ + unsigned long flags; + int err; + + if (irq >= IPIPE_NR_IRQS) + return -EINVAL; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (!test_bit(IPIPE_AHEAD_FLAG, &ipd->flags)) + /* Silently unwire interrupts for non-heading domains. */ + modemask &= ~IPIPE_WIRED_MASK; + + spin_lock_irqsave_hw(&__ipipe_pipelock, flags); + + if (handler != NULL) { + + if (handler == IPIPE_SAME_HANDLER) { + handler = ipd->irqs[irq].handler; + cookie = ipd->irqs[irq].cookie; + + if (handler == NULL) { + err = -EINVAL; + goto unlock_and_exit; + } + } else if ((modemask & IPIPE_EXCLUSIVE_MASK) != 0 && + ipd->irqs[irq].handler != NULL) { + err = -EBUSY; + goto unlock_and_exit; + } + + if ((modemask & (IPIPE_SHARED_MASK | IPIPE_PASS_MASK)) == + IPIPE_SHARED_MASK) { + err = -EINVAL; + goto unlock_and_exit; + } + + /* Wired interrupts can only be delivered to domains + * always heading the pipeline. */ + + if ((modemask & IPIPE_WIRED_MASK) != 0) { + if ((modemask & (IPIPE_SHARED_MASK | IPIPE_PASS_MASK | IPIPE_STICKY_MASK)) != 0) { + err = -EINVAL; + goto unlock_and_exit; + } + modemask |= (IPIPE_HANDLE_MASK); + } + + if ((modemask & IPIPE_STICKY_MASK) != 0) + modemask |= IPIPE_HANDLE_MASK; + } else + modemask &= + ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK | + IPIPE_SHARED_MASK | IPIPE_EXCLUSIVE_MASK | IPIPE_WIRED_MASK); + + if (acknowledge == NULL) { + if ((modemask & IPIPE_SHARED_MASK) == 0) { + if (!ipipe_virtual_irq_p(irq)) { + /* Acknowledge handler unspecified for a hw + interrupt -- this is ok in non-shared + management mode, but we will force the use + of the Linux-defined handler instead. */ + acknowledge = ipipe_root_domain->irqs[irq].acknowledge; + } + } + else { + /* A valid acknowledge handler to be called in shared mode + is required when declaring a shared IRQ. */ + err = -EINVAL; + goto unlock_and_exit; + } + } + + ipd->irqs[irq].handler = handler; + ipd->irqs[irq].cookie = cookie; + ipd->irqs[irq].acknowledge = acknowledge; + ipd->irqs[irq].control = modemask; + + if (irq < NR_IRQS && handler != NULL && !ipipe_virtual_irq_p(irq)) { + __ipipe_enable_irqdesc(irq); + + if ((modemask & IPIPE_ENABLE_MASK) != 0) { + if (ipd != ipipe_current_domain) { + /* IRQ enable/disable state is domain-sensitive, so we may + not change it for another domain. What is allowed + however is forcing some domain to handle an interrupt + source, by passing the proper 'ipd' descriptor which + thus may be different from ipipe_current_domain. */ + err = -EPERM; + goto unlock_and_exit; + } + + __ipipe_enable_irq(irq); + } + } + + err = 0; + + unlock_and_exit: + + spin_unlock_irqrestore_hw(&__ipipe_pipelock, flags); + + return err; +} + +/* ipipe_control_irq() -- Change modes of a pipelined interrupt for + * the current domain. */ + +int ipipe_control_irq(unsigned irq, unsigned clrmask, unsigned setmask) +{ + struct ipipe_domain *ipd; + unsigned long flags; + + if (irq >= IPIPE_NR_IRQS) + return -EINVAL; + + ipd = ipipe_current_domain; + + if (ipd->irqs[irq].control & IPIPE_SYSTEM_MASK) + return -EPERM; + + if (((setmask | clrmask) & IPIPE_SHARED_MASK) != 0) + return -EINVAL; + + if (ipd->irqs[irq].handler == NULL) + setmask &= ~(IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + if ((setmask & IPIPE_STICKY_MASK) != 0) + setmask |= IPIPE_HANDLE_MASK; + + if ((clrmask & (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK)) != 0) /* If one goes, both go. */ + clrmask |= (IPIPE_HANDLE_MASK | IPIPE_STICKY_MASK); + + spin_lock_irqsave_hw(&__ipipe_pipelock, flags); + + ipd->irqs[irq].control &= ~clrmask; + ipd->irqs[irq].control |= setmask; + + if ((setmask & IPIPE_ENABLE_MASK) != 0) + __ipipe_enable_irq(irq); + else if ((clrmask & IPIPE_ENABLE_MASK) != 0) + __ipipe_disable_irq(irq); + + spin_unlock_irqrestore_hw(&__ipipe_pipelock, flags); + + return 0; +} + +/* __ipipe_dispatch_event() -- Low-level event dispatcher. */ + +int fastcall __ipipe_dispatch_event (unsigned event, void *data) +{ + struct ipipe_domain *start_domain, *this_domain, *next_domain; + ipipe_event_handler_t evhand; + struct list_head *pos, *npos; + unsigned long flags; + ipipe_declare_cpuid; + int propagate = 1; + + ipipe_lock_cpu(flags); + + start_domain = this_domain = per_cpu(ipipe_percpu_domain, cpuid); + + list_for_each_safe(pos,npos,&__ipipe_pipeline) { + + /* + * Note: Domain migration may occur while running + * event or interrupt handlers, in which case the + * current register set is going to be recycled for a + * different domain than the initiating one. We do + * care for that, always tracking the current domain + * descriptor upon return from those handlers. + */ + next_domain = list_entry(pos,struct ipipe_domain,p_link); + + /* + * Keep a cached copy of the handler's address since + * ipipe_catch_event() may clear it under our feet. + */ + + evhand = next_domain->evhand[event]; + + if (evhand != NULL) { + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + next_domain->cpudata[cpuid].evsync |= (1LL << event); + ipipe_unlock_cpu(flags); + propagate = !evhand(event,start_domain,data); + ipipe_lock_cpu(flags); + next_domain->cpudata[cpuid].evsync &= ~(1LL << event); + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + if (next_domain != ipipe_root_domain && /* NEVER sync the root stage here. */ + next_domain->cpudata[cpuid].irq_pending_hi != 0 && + !test_bit(IPIPE_STALL_FLAG,&next_domain->cpudata[cpuid].status)) { + per_cpu(ipipe_percpu_domain, cpuid) = next_domain; + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + ipipe_load_cpuid(); + if (per_cpu(ipipe_percpu_domain, cpuid) != next_domain) + this_domain = per_cpu(ipipe_percpu_domain, cpuid); + } + + per_cpu(ipipe_percpu_domain, cpuid) = this_domain; + + if (next_domain == this_domain || !propagate) + break; + } + + ipipe_unlock_cpu(flags); + + return !propagate; +} + +/* + * __ipipe_dispatch_wired -- Wired interrupt dispatcher. Wired + * interrupts are immediately and unconditionally delivered to the + * domain heading the pipeline upon receipt, and such domain must have + * been registered as an invariant head for the system (priority == + * IPIPE_HEAD_PRIORITY). The motivation for using wired interrupts is + * to get an extra-fast dispatching path for those IRQs, by relying on + * a straightforward logic based on assumptions that must always be + * true for invariant head domains. The following assumptions are + * made when dealing with such interrupts: + * + * 1- Wired interrupts are purely dynamic, i.e. the decision to + * propagate them down the pipeline must be done from the head domain + * ISR. + * 2- Wired interrupts cannot be shared or sticky. + * 3- The root domain cannot be an invariant pipeline head, in + * consequence of what the root domain cannot handle wired + * interrupts. + * 4- Wired interrupts must have a valid acknowledge handler for the + * head domain (if needed), and in any case, must not rely on handlers + * provided by lower priority domains during the acknowledge cycle + * (see __ipipe_handle_irq). + * + * Called with hw interrupts off. + */ + +int fastcall __ipipe_dispatch_wired(struct ipipe_domain *head, unsigned irq) +{ + struct ipcpudata *cpudata; + struct ipipe_domain *old; + ipipe_declare_cpuid; + + ipipe_load_cpuid(); + cpudata = &head->cpudata[cpuid]; + cpudata->irq_counters[irq].total_hits++; + + if (test_bit(IPIPE_LOCK_FLAG, &head->irqs[irq].control)) { + /* If we can't process this IRQ right now, we must + * mark it as pending, so that it will get played + * during normal log sync when the corresponding + * interrupt source is eventually unlocked. */ + cpudata->irq_counters[irq].pending_hits++; + return 0; + } + + if (test_bit(IPIPE_STALL_FLAG, &cpudata->status)) { + cpudata->irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(head, cpuid, irq); + return 0; + } + + old = per_cpu(ipipe_percpu_domain, cpuid); + per_cpu(ipipe_percpu_domain, cpuid) = head; /* Switch to the head domain. */ + + __set_bit(IPIPE_STALL_FLAG, &cpudata->status); + head->irqs[irq].handler(irq,head->irqs[irq].cookie); /* Call the ISR. */ + __ipipe_run_irqtail(); + __clear_bit(IPIPE_STALL_FLAG, &cpudata->status); + + /* We expect the caller to start a complete pipeline walk upon + * return, so that propagated interrupts will get played. */ + + if (per_cpu(ipipe_percpu_domain, cpuid) == head) + per_cpu(ipipe_percpu_domain, cpuid) = old; /* Back to the preempted domain. */ + + return 1; +} + +/* + * __ipipe_sync_stage() -- Flush the pending IRQs for the current + * domain (and processor). This routine flushes the interrupt log + * (see "Optimistic interrupt protection" from D. Stodolsky et al. for + * more on the deferred interrupt scheme). Every interrupt that + * occurred while the pipeline was stalled gets played. WARNING: + * callers on SMP boxen should always check for CPU migration on + * return of this routine. One can control the kind of interrupts + * which are going to be sync'ed using the syncmask + * parameter. IPIPE_IRQMASK_ANY plays them all, IPIPE_IRQMASK_VIRT + * plays virtual interrupts only. + * + * This routine must be called with hw interrupts off. + */ +void fastcall __ipipe_sync_stage(unsigned long syncmask) +{ + unsigned long mask, submask; + struct ipcpudata *cpudata; + struct ipipe_domain *ipd; + ipipe_declare_cpuid; + int level, rank; + unsigned irq; + + ipipe_load_cpuid(); + ipd = per_cpu(ipipe_percpu_domain, cpuid); + cpudata = &ipd->cpudata[cpuid]; + + if (__test_and_set_bit(IPIPE_SYNC_FLAG, &cpudata->status)) + return; + + /* + * The policy here is to keep the dispatching code interrupt-free + * by stalling the current stage. If the upper domain handler + * (which we call) wants to re-enable interrupts while in a safe + * portion of the code (e.g. SA_INTERRUPT flag unset for Linux's + * sigaction()), it will have to unstall (then stall again before + * returning to us!) the stage when it sees fit. + */ + while ((mask = (cpudata->irq_pending_hi & syncmask)) != 0) { + level = __ipipe_ffnz(mask); + + while ((submask = cpudata->irq_pending_lo[level]) != 0) { + rank = __ipipe_ffnz(submask); + irq = (level << IPIPE_IRQ_ISHIFT) + rank; + + if (test_bit(IPIPE_LOCK_FLAG, &ipd->irqs[irq].control)) { + __clear_bit(rank, &cpudata->irq_pending_lo[level]); + continue; + } + + if (--cpudata->irq_counters[irq].pending_hits == 0) { + __clear_bit(rank, &cpudata->irq_pending_lo[level]); + if (cpudata->irq_pending_lo[level] == 0) + __clear_bit(level, &cpudata->irq_pending_hi); + } + + __set_bit(IPIPE_STALL_FLAG, &cpudata->status); + __ipipe_run_isr(ipd, irq, cpuid); +#ifdef CONFIG_SMP + { + int _cpuid = ipipe_processor_id(); + + if (_cpuid != cpuid) { /* Handle CPU migration. */ + /* + * We expect any domain to clear the SYNC bit each + * time it switches in a new task, so that preemptions + * and/or CPU migrations (in the SMP case) over the + * ISR do not lock out the log syncer for some + * indefinite amount of time. In the Linux case, + * schedule() handles this (see kernel/sched.c). For + * this reason, we don't bother clearing it here for + * the source CPU in the migration handling case, + * since it must have scheduled another task in by + * now. + */ + cpuid = _cpuid; + cpudata = &ipd->cpudata[cpuid]; + __set_bit(IPIPE_SYNC_FLAG, &cpudata->status); + } + } +#endif /* CONFIG_SMP */ + + __clear_bit(IPIPE_STALL_FLAG, &cpudata->status); + } + } + + __clear_bit(IPIPE_SYNC_FLAG, &cpudata->status); +} + +#ifdef CONFIG_PROC_FS + +#include + +struct proc_dir_entry *ipipe_proc_root; + +static int __ipipe_version_info_proc(char *page, + char **start, + off_t off, int count, int *eof, void *data) +{ + int len = sprintf(page, "%s\n", IPIPE_VERSION_STRING); + + len -= off; + + if (len <= off + count) + *eof = 1; + + *start = page + off; + + if(len > count) + len = count; + + if(len < 0) + len = 0; + + return len; +} + +static int __ipipe_common_info_proc(char *page, + char **start, + off_t off, int count, int *eof, void *data) +{ + struct ipipe_domain *ipd = (struct ipipe_domain *)data; + unsigned long ctlbits; + unsigned irq, _irq; + char *p = page; + int len; + + spin_lock(&__ipipe_pipelock); + + if (test_bit(IPIPE_AHEAD_FLAG,&ipd->flags)) + p += sprintf(p, "Invariant head"); + else + p += sprintf(p, "Priority=%d", ipd->priority); + + p += sprintf(p, ", Id=0x%.8x\n", ipd->domid); + + irq = 0; + + while (irq < IPIPE_NR_IRQS) { + ctlbits = + (ipd->irqs[irq]. + control & (IPIPE_HANDLE_MASK | IPIPE_PASS_MASK | + IPIPE_STICKY_MASK | IPIPE_WIRED_MASK)); + if (irq >= IPIPE_NR_XIRQS && !ipipe_virtual_irq_p(irq)) { + /* + * There might be a hole between the last external + * IRQ and the first virtual one; skip it. + */ + irq++; + continue; + } + + if (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, + &__ipipe_virtual_irq_map)) { + /* Non-allocated virtual IRQ; skip it. */ + irq++; + continue; + } + + /* + * Attempt to group consecutive IRQ numbers having the + * same virtualization settings in a single line. + */ + + _irq = irq; + + while (++_irq < IPIPE_NR_IRQS) { + if (ipipe_virtual_irq_p(_irq) != + ipipe_virtual_irq_p(irq) + || (ipipe_virtual_irq_p(_irq) + && !test_bit(_irq - IPIPE_VIRQ_BASE, + &__ipipe_virtual_irq_map)) + || ctlbits != (ipd->irqs[_irq]. + control & (IPIPE_HANDLE_MASK | + IPIPE_PASS_MASK | + IPIPE_STICKY_MASK))) + break; + } + + if (_irq == irq + 1) + p += sprintf(p, "irq%u: ", irq); + else + p += sprintf(p, "irq%u-%u: ", irq, _irq - 1); + + /* + * Statuses are as follows: + * o "accepted" means handled _and_ passed down the pipeline. + * o "grabbed" means handled, but the interrupt might be + * terminated _or_ passed down the pipeline depending on + * what the domain handler asks for to the I-pipe. + * o "wired" is basically the same as "grabbed", except that + * the interrupt is unconditionally delivered to an invariant + * pipeline head domain. + * o "passed" means unhandled by the domain but passed + * down the pipeline. + * o "discarded" means unhandled and _not_ passed down the + * pipeline. The interrupt merely disappears from the + * current domain down to the end of the pipeline. + */ + if (ctlbits & IPIPE_HANDLE_MASK) { + if (ctlbits & IPIPE_PASS_MASK) + p += sprintf(p, "accepted"); + else if (ctlbits & IPIPE_WIRED_MASK) + p += sprintf(p, "wired"); + else + p += sprintf(p, "grabbed"); + } else if (ctlbits & IPIPE_PASS_MASK) + p += sprintf(p, "passed"); + else + p += sprintf(p, "discarded"); + + if (ctlbits & IPIPE_STICKY_MASK) + p += sprintf(p, ", sticky"); + + if (ipipe_virtual_irq_p(irq)) + p += sprintf(p, ", virtual"); + + p += sprintf(p, "\n"); + + irq = _irq; + } + + spin_unlock(&__ipipe_pipelock); + + len = p - page; + + if (len <= off + count) + *eof = 1; + + *start = page + off; + + len -= off; + + if (len > count) + len = count; + + if (len < 0) + len = 0; + + return len; +} + +void __ipipe_add_domain_proc(struct ipipe_domain *ipd) +{ + create_proc_read_entry(ipd->name,0444,ipipe_proc_root,&__ipipe_common_info_proc,ipd); +} + +void __ipipe_remove_domain_proc(struct ipipe_domain *ipd) +{ + remove_proc_entry(ipd->name,ipipe_proc_root); +} + +void ipipe_init_proc(void) +{ + ipipe_proc_root = create_proc_entry("ipipe",S_IFDIR, 0); + create_proc_read_entry("version",0444,ipipe_proc_root,&__ipipe_version_info_proc,NULL); + __ipipe_add_domain_proc(ipipe_root_domain); + + __ipipe_init_tracer(); +} + +#endif /* CONFIG_PROC_FS */ + +EXPORT_SYMBOL(ipipe_virtualize_irq); +EXPORT_SYMBOL(ipipe_control_irq); +EXPORT_SYMBOL(ipipe_suspend_domain); +EXPORT_SYMBOL(ipipe_alloc_virq); +EXPORT_PER_CPU_SYMBOL(ipipe_percpu_domain); +EXPORT_SYMBOL(ipipe_root); +EXPORT_SYMBOL(ipipe_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_stall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_restore_pipeline_from); +EXPORT_SYMBOL(ipipe_test_and_unstall_pipeline_from); +EXPORT_SYMBOL(ipipe_unstall_pipeline_head); +EXPORT_SYMBOL(__ipipe_restore_pipeline_head); +EXPORT_SYMBOL(__ipipe_unstall_root); +EXPORT_SYMBOL(__ipipe_stall_root); +EXPORT_SYMBOL(__ipipe_restore_root); +EXPORT_SYMBOL(__ipipe_test_and_stall_root); +EXPORT_SYMBOL(__ipipe_test_root); +EXPORT_SYMBOL(__ipipe_dispatch_event); +EXPORT_SYMBOL(__ipipe_dispatch_wired); +EXPORT_SYMBOL(__ipipe_sync_stage); +EXPORT_SYMBOL(__ipipe_pipeline); +EXPORT_SYMBOL(__ipipe_pipelock); +EXPORT_SYMBOL(__ipipe_virtual_irq_map); Index: linux-2.6.16.33/kernel/ipipe/generic.c =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/generic.c @@ -0,0 +1,424 @@ +/* -*- linux-c -*- + * linux/kernel/ipipe/generic.c + * + * Copyright (C) 2002-2005 Philippe Gerum. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Architecture-independent I-PIPE services. + */ + +#include +#include +#include +#include +#include +#ifdef CONFIG_PROC_FS +#include +#endif /* CONFIG_PROC_FS */ + +MODULE_DESCRIPTION("I-pipe"); +MODULE_LICENSE("GPL"); + +static int __ipipe_ptd_key_count; + +static unsigned long __ipipe_ptd_key_map; + +/* ipipe_register_domain() -- Link a new domain to the pipeline. */ + +int ipipe_register_domain(struct ipipe_domain *ipd, + struct ipipe_domain_attr *attr) +{ + struct list_head *pos; + unsigned long flags; + + if (ipipe_current_domain != ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Only the root domain may register a new domain.\n"); + return -EPERM; + } + + if (attr->priority == IPIPE_HEAD_PRIORITY && + test_bit(IPIPE_AHEAD_FLAG,&__ipipe_pipeline_head()->flags)) + return -EAGAIN; /* Cannot override current head. */ + + flags = ipipe_critical_enter(NULL); + + list_for_each(pos, &__ipipe_pipeline) { + struct ipipe_domain *_ipd = + list_entry(pos, struct ipipe_domain, p_link); + if (_ipd->domid == attr->domid) + break; + } + + ipipe_critical_exit(flags); + + if (pos != &__ipipe_pipeline) + /* A domain with the given id already exists -- fail. */ + return -EBUSY; + + ipd->name = attr->name; + ipd->domid = attr->domid; + ipd->pdd = attr->pdd; + ipd->flags = 0; + + if (attr->priority == IPIPE_HEAD_PRIORITY) { + ipd->priority = INT_MAX; + __set_bit(IPIPE_AHEAD_FLAG,&ipd->flags); + } + else + ipd->priority = attr->priority; + + __ipipe_init_stage(ipd); + + INIT_LIST_HEAD(&ipd->p_link); + +#ifdef CONFIG_PROC_FS + __ipipe_add_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + flags = ipipe_critical_enter(NULL); + + list_for_each(pos, &__ipipe_pipeline) { + struct ipipe_domain *_ipd = + list_entry(pos, struct ipipe_domain, p_link); + if (ipd->priority > _ipd->priority) + break; + } + + list_add_tail(&ipd->p_link, pos); + + ipipe_critical_exit(flags); + + printk(KERN_WARNING "I-pipe: Domain %s registered.\n", ipd->name); + + /* + * Finally, allow the new domain to perform its initialization + * chores. + */ + + if (attr->entry != NULL) { + ipipe_declare_cpuid; + + ipipe_lock_cpu(flags); + + per_cpu(ipipe_percpu_domain, cpuid) = ipd; + attr->entry(); + per_cpu(ipipe_percpu_domain, cpuid) = ipipe_root_domain; + + ipipe_load_cpuid(); /* Processor might have changed. */ + + if (ipipe_root_domain->cpudata[cpuid].irq_pending_hi != 0 && + !test_bit(IPIPE_STALL_FLAG, + &ipipe_root_domain->cpudata[cpuid].status)) + __ipipe_sync_pipeline(IPIPE_IRQMASK_ANY); + + ipipe_unlock_cpu(flags); + } + + return 0; +} + +/* ipipe_unregister_domain() -- Remove a domain from the pipeline. */ + +int ipipe_unregister_domain(struct ipipe_domain *ipd) +{ + unsigned long flags; + + if (ipipe_current_domain != ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Only the root domain may unregister a domain.\n"); + return -EPERM; + } + + if (ipd == ipipe_root_domain) { + printk(KERN_WARNING + "I-pipe: Cannot unregister the root domain.\n"); + return -EPERM; + } +#ifdef CONFIG_SMP + { + int nr_cpus = num_online_cpus(), _cpuid; + unsigned irq; + + /* + * In the SMP case, wait for the logged events to drain on + * other processors before eventually removing the domain + * from the pipeline. + */ + + ipipe_unstall_pipeline_from(ipd); + + flags = ipipe_critical_enter(NULL); + + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) { + clear_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control); + clear_bit(IPIPE_STICKY_FLAG, &ipd->irqs[irq].control); + set_bit(IPIPE_PASS_FLAG, &ipd->irqs[irq].control); + } + + ipipe_critical_exit(flags); + + for (_cpuid = 0; _cpuid < nr_cpus; _cpuid++) + for (irq = 0; irq < IPIPE_NR_IRQS; irq++) + while (ipd->cpudata[_cpuid].irq_counters[irq].pending_hits > 0) + cpu_relax(); + } +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PROC_FS + __ipipe_remove_domain_proc(ipd); +#endif /* CONFIG_PROC_FS */ + + /* + * Simply remove the domain from the pipeline and we are almost done. + */ + + flags = ipipe_critical_enter(NULL); + list_del_init(&ipd->p_link); + ipipe_critical_exit(flags); + + __ipipe_cleanup_domain(ipd); + + printk(KERN_WARNING "I-pipe: Domain %s unregistered.\n", ipd->name); + + return 0; +} + +/* + * ipipe_propagate_irq() -- Force a given IRQ propagation on behalf of + * a running interrupt handler to the next domain down the pipeline. + * ipipe_schedule_irq() -- Does almost the same as above, but attempts + * to pend the interrupt for the current domain first. + */ +int fastcall __ipipe_schedule_irq(unsigned irq, struct list_head *head) +{ + struct list_head *ln; + unsigned long flags; + ipipe_declare_cpuid; + + if (irq >= IPIPE_NR_IRQS || + (ipipe_virtual_irq_p(irq) + && !test_bit(irq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map))) + return -EINVAL; + + ipipe_lock_cpu(flags); + + ln = head; + + while (ln != &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(ln, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_HANDLE_FLAG, &ipd->irqs[irq].control)) { + ipd->cpudata[cpuid].irq_counters[irq].total_hits++; + ipd->cpudata[cpuid].irq_counters[irq].pending_hits++; + __ipipe_set_irq_bit(ipd, cpuid, irq); + ipipe_unlock_cpu(flags); + return 1; + } + + ln = ipd->p_link.next; + } + + ipipe_unlock_cpu(flags); + + return 0; +} + +/* ipipe_free_virq() -- Release a virtual/soft interrupt. */ + +int ipipe_free_virq(unsigned virq) +{ + if (!ipipe_virtual_irq_p(virq)) + return -EINVAL; + + clear_bit(virq - IPIPE_VIRQ_BASE, &__ipipe_virtual_irq_map); + + return 0; +} + +void ipipe_init_attr(struct ipipe_domain_attr *attr) +{ + attr->name = "anon"; + attr->domid = 1; + attr->entry = NULL; + attr->priority = IPIPE_ROOT_PRIO; + attr->pdd = NULL; +} + +/* + * ipipe_catch_event() -- Interpose or remove an event handler for a + * given domain. + */ +ipipe_event_handler_t ipipe_catch_event(struct ipipe_domain *ipd, + unsigned event, + ipipe_event_handler_t handler) +{ + ipipe_event_handler_t old_handler; + unsigned long flags; + int self = 0, cpuid; + + if (event & IPIPE_EVENT_SELF) { + event &= ~IPIPE_EVENT_SELF; + self = 1; + } + + if (event >= IPIPE_NR_EVENTS) + return NULL; + + flags = ipipe_critical_enter(NULL); + + if (!(old_handler = xchg(&ipd->evhand[event],handler))) { + if (handler) { + if (self) + ipd->evself |= (1LL << event); + else + __ipipe_event_monitors[event]++; + } + } + else if (!handler) { + if (ipd->evself & (1LL << event)) + ipd->evself &= ~(1LL << event); + else + __ipipe_event_monitors[event]--; + } else if ((ipd->evself & (1LL << event)) && !self) { + __ipipe_event_monitors[event]++; + ipd->evself &= ~(1LL << event); + } else if (!(ipd->evself & (1LL << event)) && self) { + __ipipe_event_monitors[event]--; + ipd->evself |= (1LL << event); + } + + ipipe_critical_exit(flags); + + if (!handler && ipipe_root_domain_p) { + /* + * If we cleared a handler on behalf of the root + * domain, we have to wait for any current invocation + * to drain, since our caller might subsequently unmap + * the target domain. To this aim, this code + * synchronizes with __ipipe_dispatch_event(), + * guaranteeing that either the dispatcher sees a null + * handler in which case it discards the invocation + * (which also prevents from entering a livelock), or + * finds a valid handler and calls it. Symmetrically, + * ipipe_catch_event() ensures that the called code + * won't be unmapped under our feet until the event + * synchronization flag is cleared for the given event + * on all CPUs. + */ + + for_each_online_cpu(cpuid) { + while (ipd->cpudata[cpuid].evsync & (1LL << event)) + schedule_timeout_interruptible(HZ / 50); + } + } + + return old_handler; +} + +cpumask_t ipipe_set_irq_affinity (unsigned irq, cpumask_t cpumask) +{ +#ifdef CONFIG_SMP + if (irq >= IPIPE_NR_XIRQS) + /* Allow changing affinity of external IRQs only. */ + return CPU_MASK_NONE; + + if (num_online_cpus() > 1) + return __ipipe_set_irq_affinity(irq,cpumask); +#endif /* CONFIG_SMP */ + + return CPU_MASK_NONE; +} + +int fastcall ipipe_send_ipi (unsigned ipi, cpumask_t cpumask) + +{ +#ifdef CONFIG_SMP + return __ipipe_send_ipi(ipi,cpumask); +#else /* !CONFIG_SMP */ + return -EINVAL; +#endif /* CONFIG_SMP */ +} + +int ipipe_alloc_ptdkey (void) +{ + unsigned long flags; + int key = -1; + + spin_lock_irqsave_hw(&__ipipe_pipelock,flags); + + if (__ipipe_ptd_key_count < IPIPE_ROOT_NPTDKEYS) { + key = ffz(__ipipe_ptd_key_map); + set_bit(key,&__ipipe_ptd_key_map); + __ipipe_ptd_key_count++; + } + + spin_unlock_irqrestore_hw(&__ipipe_pipelock,flags); + + return key; +} + +int ipipe_free_ptdkey (int key) +{ + unsigned long flags; + + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + spin_lock_irqsave_hw(&__ipipe_pipelock,flags); + + if (test_and_clear_bit(key,&__ipipe_ptd_key_map)) + __ipipe_ptd_key_count--; + + spin_unlock_irqrestore_hw(&__ipipe_pipelock,flags); + + return 0; +} + +int fastcall ipipe_set_ptd (int key, void *value) + +{ + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return -EINVAL; + + current->ptd[key] = value; + + return 0; +} + +void fastcall *ipipe_get_ptd (int key) + +{ + if (key < 0 || key >= IPIPE_ROOT_NPTDKEYS) + return NULL; + + return current->ptd[key]; +} + +EXPORT_SYMBOL(ipipe_register_domain); +EXPORT_SYMBOL(ipipe_unregister_domain); +EXPORT_SYMBOL(ipipe_free_virq); +EXPORT_SYMBOL(ipipe_init_attr); +EXPORT_SYMBOL(ipipe_catch_event); +EXPORT_SYMBOL(ipipe_alloc_ptdkey); +EXPORT_SYMBOL(ipipe_free_ptdkey); +EXPORT_SYMBOL(ipipe_set_ptd); +EXPORT_SYMBOL(ipipe_get_ptd); +EXPORT_SYMBOL(ipipe_set_irq_affinity); +EXPORT_SYMBOL(ipipe_send_ipi); +EXPORT_SYMBOL(__ipipe_schedule_irq); Index: linux-2.6.16.33/kernel/ipipe/tracer.c =================================================================== --- /dev/null +++ linux-2.6.16.33/kernel/ipipe/tracer.c @@ -0,0 +1,1243 @@ +/* -*- linux-c -*- + * kernel/ipipe/tracer.c + * + * Copyright (C) 2005 Luotao Fu. + * 2005, 2006 Jan Kiszka. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139, + * USA; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IPIPE_TRACE_PATHS 4 /* Do not lower below 3 */ +#define IPIPE_DEFAULT_ACTIVE 0 +#define IPIPE_DEFAULT_MAX 1 +#define IPIPE_DEFAULT_FROZEN 2 + +#define IPIPE_TRACE_POINTS (1 << CONFIG_IPIPE_TRACE_SHIFT) +#define WRAP_POINT_NO(point) ((point) & (IPIPE_TRACE_POINTS-1)) + +#define IPIPE_DEFAULT_PRE_TRACE 10 +#define IPIPE_DEFAULT_POST_TRACE 10 +#define IPIPE_DEFAULT_BACK_TRACE 30 + +#define IPIPE_DELAY_NOTE 1000 /* in nanoseconds */ +#define IPIPE_DELAY_WARN 10000 /* in nanoseconds */ + +#define IPIPE_TFLG_NMI_LOCK 0x0001 +#define IPIPE_TFLG_NMI_HIT 0x0002 +#define IPIPE_TFLG_NMI_FREEZE_REQ 0x0004 + +#define IPIPE_TFLG_HWIRQ_OFF 0x0100 +#define IPIPE_TFLG_FREEZING 0x0200 +#define IPIPE_TFLG_CURRDOM_SHIFT 10 /* bits 10..11: current domain */ +#define IPIPE_TFLG_CURRDOM_MASK 0x0C00 +#define IPIPE_TFLG_DOMSTATE_SHIFT 12 /* bits 12..15: domain stalled? */ +#define IPIPE_TFLG_DOMSTATE_BITS 3 + +#define IPIPE_TFLG_DOMAIN_STALLED(point, n) \ + (point->flags & (1 << (n + IPIPE_TFLG_DOMSTATE_SHIFT))) +#define IPIPE_TFLG_CURRENT_DOMAIN(point) \ + ((point->flags & IPIPE_TFLG_CURRDOM_MASK) >> IPIPE_TFLG_CURRDOM_SHIFT) + + +struct ipipe_trace_point{ + short type; + short flags; + unsigned long eip; + unsigned long parent_eip; + unsigned long v; + unsigned long long timestamp; +}; + +struct ipipe_trace_path{ + volatile int flags; + int dump_lock; /* separated from flags due to cross-cpu access */ + int trace_pos; /* next point to fill */ + int begin, end; /* finalised path begin and end */ + int post_trace; /* non-zero when in post-trace phase */ + unsigned long long length; /* max path length in cycles */ + unsigned long nmi_saved_eip; /* for deferred requests from NMIs */ + unsigned long nmi_saved_parent_eip; + unsigned long nmi_saved_v; + struct ipipe_trace_point point[IPIPE_TRACE_POINTS]; +} ____cacheline_aligned_in_smp; + +enum ipipe_trace_type +{ + IPIPE_TRACE_FUNC = 0, + IPIPE_TRACE_BEGIN, + IPIPE_TRACE_END, + IPIPE_TRACE_FREEZE, + IPIPE_TRACE_SPECIAL, + IPIPE_TRACE_PID, +}; + +#define IPIPE_TYPE_MASK 0x0007 +#define IPIPE_TYPE_BITS 3 + + +#ifdef CONFIG_IPIPE_TRACE_VMALLOC +#define IPIPE_DEFAULT_TRACE_STATE 0 + +static struct ipipe_trace_path *trace_paths[NR_CPUS]; + +#else /* !CONFIG_IPIPE_TRACE_VMALLOC */ +#define IPIPE_DEFAULT_TRACE_STATE CONFIG_IPIPE_TRACE_ENABLE_VALUE + +static struct ipipe_trace_path trace_paths[NR_CPUS][IPIPE_TRACE_PATHS] = + { [0 ... NR_CPUS-1] = + { [0 ... IPIPE_TRACE_PATHS-1] = + { .begin = -1, .end = -1 } + } + }; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + +int ipipe_trace_enable = IPIPE_DEFAULT_TRACE_STATE; + +static int active_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_ACTIVE }; +static int max_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_MAX }; +static int frozen_path[NR_CPUS] = + { [0 ... NR_CPUS-1] = IPIPE_DEFAULT_FROZEN }; +static ipipe_spinlock_t global_path_lock = IPIPE_SPIN_LOCK_UNLOCKED; +static int pre_trace = IPIPE_DEFAULT_PRE_TRACE; +static int post_trace = IPIPE_DEFAULT_POST_TRACE; +static int back_trace = IPIPE_DEFAULT_BACK_TRACE; +static int verbose_trace = 0; + +static DECLARE_MUTEX(out_mutex); +static struct ipipe_trace_path *print_path; +static struct ipipe_trace_path *panic_path; +static int print_pre_trace; +static int print_post_trace; + + +static long __ipipe_signed_tsc2us(long long tsc); +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point); +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip); + + +static notrace void +__ipipe_store_domain_states(struct ipipe_trace_point *point, int cpu_id) +{ + struct list_head *pos; + int i = 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(pos, struct ipipe_domain, p_link); + + if (test_bit(IPIPE_STALL_FLAG, &ipd->cpudata[cpu_id].status)) + point->flags |= 1 << (i + IPIPE_TFLG_DOMSTATE_SHIFT); + + if (ipd == per_cpu(ipipe_percpu_domain, cpu_id)) + point->flags |= i << IPIPE_TFLG_CURRDOM_SHIFT; + + if (++i > IPIPE_TFLG_DOMSTATE_BITS) + break; + } +} + +static notrace int __ipipe_get_free_trace_path(int old, int cpu_id) +{ + int new_active = old; + struct ipipe_trace_path *tp; + + do { + if (++new_active == IPIPE_TRACE_PATHS) + new_active = 0; + tp = &trace_paths[cpu_id][new_active]; + } while ((new_active == max_path[cpu_id]) || + (new_active == frozen_path[cpu_id]) || + tp->dump_lock); + + return new_active; +} + +static notrace void +__ipipe_migrate_pre_trace(struct ipipe_trace_path *new_tp, + struct ipipe_trace_path *old_tp, int old_pos) +{ + int i; + + new_tp->trace_pos = pre_trace+1; + + for (i = new_tp->trace_pos; i > 0; i--) + memcpy(&new_tp->point[WRAP_POINT_NO(new_tp->trace_pos-i)], + &old_tp->point[WRAP_POINT_NO(old_pos-i)], + sizeof(struct ipipe_trace_point)); + + /* mark the end (i.e. the point before point[0]) invalid */ + new_tp->point[IPIPE_TRACE_POINTS-1].eip = 0; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_end(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = active_path[cpu_id]; + unsigned long long length; + + /* do we have a new worst case? */ + length = tp->point[tp->end].timestamp - + tp->point[tp->begin].timestamp; + if (length > (trace_paths[cpu_id][max_path[cpu_id]]).length) { + /* we need protection here against other cpus trying + to start a proc dump */ + spin_lock_hw(&global_path_lock); + + /* active path holds new worst case */ + tp->length = length; + max_path[cpu_id] = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu_id); + + spin_unlock_hw(&global_path_lock); + + tp = &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + } + + return tp; +} + +static notrace struct ipipe_trace_path * +__ipipe_trace_freeze(int cpu_id, struct ipipe_trace_path *tp, int pos) +{ + struct ipipe_trace_path *old_tp = tp; + long active = active_path[cpu_id]; + int i; + + /* frozen paths have no core (begin=end) */ + tp->begin = tp->end; + + /* we need protection here against other cpus trying + * to set their frozen path or to start a proc dump */ + spin_lock_hw(&global_path_lock); + + frozen_path[cpu_id] = active; + + /* find next unused trace path */ + active = __ipipe_get_free_trace_path(active, cpu_id); + + /* check if this is the first frozen path */ + for_each_online_cpu(i) { + if ((i != cpu_id) && + (trace_paths[i][frozen_path[i]].end >= 0)) + tp->end = -1; + } + + spin_unlock_hw(&global_path_lock); + + tp = &trace_paths[cpu_id][active]; + + /* migrate last entries for pre-tracing */ + __ipipe_migrate_pre_trace(tp, old_tp, pos); + + return tp; +} + +void notrace +__ipipe_trace(enum ipipe_trace_type type, unsigned long eip, + unsigned long parent_eip, unsigned long v) +{ + struct ipipe_trace_path *tp, *old_tp; + int pos, next_pos, begin; + struct ipipe_trace_point *point; + unsigned long flags; + int cpu_id; + + local_irq_save_hw_notrace(flags); + + cpu_id = ipipe_processor_id(); +restart: + tp = old_tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here starts a race window with NMIs - catched below */ + + /* check for NMI recursion */ + if (unlikely(tp->flags & IPIPE_TFLG_NMI_LOCK)) { + tp->flags |= IPIPE_TFLG_NMI_HIT; + + /* first freeze request from NMI context? */ + if ((type == IPIPE_TRACE_FREEZE) && + !(tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ)) { + /* save arguments and mark deferred freezing */ + tp->flags |= IPIPE_TFLG_NMI_FREEZE_REQ; + tp->nmi_saved_eip = eip; + tp->nmi_saved_parent_eip = parent_eip; + tp->nmi_saved_v = v; + } + return; /* no need for restoring flags inside IRQ */ + } + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (unlikely(tp != &trace_paths[cpu_id][active_path[cpu_id]])) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + /* get the point buffer */ + pos = tp->trace_pos; + point = &tp->point[pos]; + + /* store all trace point data */ + point->type = type; + point->flags = local_test_iflag_hw(flags) ? 0 : IPIPE_TFLG_HWIRQ_OFF; + point->eip = eip; + point->parent_eip = parent_eip; + point->v = v; + ipipe_read_tsc(point->timestamp); + + __ipipe_store_domain_states(point, cpu_id); + + /* forward to next point buffer */ + next_pos = WRAP_POINT_NO(pos+1); + tp->trace_pos = next_pos; + + /* only mark beginning if we haven't started yet */ + begin = tp->begin; + if (unlikely(type == IPIPE_TRACE_BEGIN) && (begin < 0)) + tp->begin = pos; + + /* end of critical path, start post-trace if not already started */ + if (unlikely(type == IPIPE_TRACE_END) && + (begin >= 0) && !tp->post_trace) + tp->post_trace = post_trace + 1; + + /* freeze only if the slot is free and we are not already freezing */ + if (unlikely(type == IPIPE_TRACE_FREEZE) && + (trace_paths[cpu_id][frozen_path[cpu_id]].begin < 0) && + !(tp->flags & IPIPE_TFLG_FREEZING)) { + tp->post_trace = post_trace + 1; + tp->flags |= IPIPE_TFLG_FREEZING; + } + + /* enforce end of trace in case of overflow */ + if (unlikely(WRAP_POINT_NO(next_pos + 1) == begin)) { + tp->end = pos; + goto enforce_end; + } + + /* stop tracing this path if we are in post-trace and + * a) that phase is over now or + * b) a new TRACE_BEGIN came in but we are not freezing this path */ + if (unlikely((tp->post_trace > 0) && ((--tp->post_trace == 0) || + ((type == IPIPE_TRACE_BEGIN) && + !(tp->flags & IPIPE_TFLG_FREEZING))))) { + /* store the path's end (i.e. excluding post-trace) */ + tp->end = WRAP_POINT_NO(pos - post_trace + tp->post_trace); + +enforce_end: + if (tp->flags & IPIPE_TFLG_FREEZING) + tp = __ipipe_trace_freeze(cpu_id, tp, pos); + else + tp = __ipipe_trace_end(cpu_id, tp, pos); + + /* reset the active path, maybe already start a new one */ + tp->begin = (type == IPIPE_TRACE_BEGIN) ? + WRAP_POINT_NO(tp->trace_pos - 1) : -1; + tp->end = -1; + tp->post_trace = 0; + tp->flags = 0; + + /* update active_path not earlier to avoid races with NMIs */ + active_path[cpu_id] = tp - trace_paths[cpu_id]; + } + + /* we still have old_tp and point, + * let's reset NMI lock and check for catches */ + old_tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + if (unlikely(old_tp->flags & IPIPE_TFLG_NMI_HIT)) { + /* well, this late tagging may not immediately be visible for + * other cpus already dumping this path - a minor issue */ + point->flags |= IPIPE_TFLG_NMI_HIT; + + /* handle deferred freezing from NMI context */ + if (old_tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, old_tp->nmi_saved_eip, + old_tp->nmi_saved_parent_eip, + old_tp->nmi_saved_v); + } + + local_irq_restore_hw_notrace(flags); +} + +static unsigned long __ipipe_global_path_lock(void) +{ + unsigned long flags; + int cpu_id; + struct ipipe_trace_path *tp; + + spin_lock_irqsave_hw(&global_path_lock, flags); + + cpu_id = ipipe_processor_id(); +restart: + tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + /* here is small race window with NMIs - catched below */ + + /* clear NMI events and set lock (atomically per cpu) */ + tp->flags = (tp->flags & ~(IPIPE_TFLG_NMI_HIT | + IPIPE_TFLG_NMI_FREEZE_REQ)) + | IPIPE_TFLG_NMI_LOCK; + + /* check active_path again - some nasty NMI may have switched + * it meanwhile */ + if (tp != &trace_paths[cpu_id][active_path[cpu_id]]) { + /* release lock on wrong path and restart */ + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* there is no chance that the NMI got deferred + * => no need to check for pending freeze requests */ + goto restart; + } + + return flags; +} + +static void __ipipe_global_path_unlock(unsigned long flags) +{ + int cpu_id; + struct ipipe_trace_path *tp; + + /* release spinlock first - it's not involved in the NMI issue */ + spin_unlock_hw(&global_path_lock); + + cpu_id = ipipe_processor_id(); + tp = &trace_paths[cpu_id][active_path[cpu_id]]; + + tp->flags &= ~IPIPE_TFLG_NMI_LOCK; + + /* handle deferred freezing from NMI context */ + if (tp->flags & IPIPE_TFLG_NMI_FREEZE_REQ) + __ipipe_trace(IPIPE_TRACE_FREEZE, tp->nmi_saved_eip, + tp->nmi_saved_parent_eip, tp->nmi_saved_v); + + local_irq_restore_hw(flags); +} + +void notrace ipipe_trace_begin(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_BEGIN, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_begin); + +void notrace ipipe_trace_end(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_END, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_end); + +void notrace ipipe_trace_freeze(unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_FREEZE, __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_freeze); + +void notrace ipipe_trace_special(unsigned char id, unsigned long v) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_SPECIAL | (id << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, v); +} +EXPORT_SYMBOL(ipipe_trace_special); + +void notrace ipipe_trace_pid(pid_t pid, short prio) +{ + if (!ipipe_trace_enable) + return; + __ipipe_trace(IPIPE_TRACE_PID | (prio << IPIPE_TYPE_BITS), + __BUILTIN_RETURN_ADDRESS0, + __BUILTIN_RETURN_ADDRESS1, pid); +} +EXPORT_SYMBOL(ipipe_trace_pid); + +int ipipe_trace_max_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_cpu(cpu_id) { + path = &trace_paths[cpu_id][max_path[cpu_id]]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_max_reset); + +int ipipe_trace_frozen_reset(void) +{ + int cpu_id; + unsigned long flags; + struct ipipe_trace_path *path; + int ret = 0; + + flags = __ipipe_global_path_lock(); + + for_each_cpu(cpu_id) { + path = &trace_paths[cpu_id][frozen_path[cpu_id]]; + + if (path->dump_lock) { + ret = -EBUSY; + break; + } + + path->begin = -1; + path->end = -1; + path->trace_pos = 0; + path->length = 0; + } + + __ipipe_global_path_unlock(flags); + + return ret; +} +EXPORT_SYMBOL(ipipe_trace_frozen_reset); + +void ipipe_trace_panic_freeze(void) +{ + unsigned long flags; + int cpu_id; + + ipipe_trace_enable = 0; + local_irq_save_hw_notrace(flags); + + cpu_id = ipipe_processor_id(); + + panic_path = &trace_paths[cpu_id][active_path[cpu_id]]; + + local_irq_restore_hw(flags); +} +EXPORT_SYMBOL(ipipe_trace_panic_freeze); + +static void +__ipipe_get_task_info(char *task_info, struct ipipe_trace_point *point, + int trylock) +{ + struct task_struct *task = NULL; + char buf[8]; + int i; + int locked = 1; + + if (trylock && !read_trylock(&tasklist_lock)) + locked = 0; + else + read_lock(&tasklist_lock); + + if (locked) + task = find_task_by_pid((pid_t)point->v); + + if (task) + strncpy(task_info, task->comm, 11); + else + strcpy(task_info, "--"); + + if (locked) + read_unlock(&tasklist_lock); + + for (i = strlen(task_info); i < 11; i++) + task_info[i] = ' '; + + sprintf(buf, " %d ", point->type >> IPIPE_TYPE_BITS); + strcpy(task_info + (11 - strlen(buf)), buf); +} + +void ipipe_trace_panic_dump(void) +{ + int cnt = back_trace; + int start, pos; + char task_info[12]; + + printk("I-pipe tracer log (%d points):\n", cnt); + + start = pos = WRAP_POINT_NO(panic_path->trace_pos-1); + + while (cnt-- > 0) { + struct ipipe_trace_point *point = &panic_path->point[pos]; + long time; + char buf[16]; + + if (!point->eip) + printk("--\n"); + else { + __ipipe_trace_point_type(buf, point); + printk(buf); + + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + printk(" "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(task_info, + point, 1); + printk(task_info); + break; + + default: + printk("0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + panic_path->point[start].timestamp); + printk(" %5ld ", time); + + __ipipe_print_symname(NULL, point->eip); + printk(" ("); + __ipipe_print_symname(NULL, point->parent_eip); + printk(")\n"); + } + pos = WRAP_POINT_NO(pos - 1); + } +} +EXPORT_SYMBOL(ipipe_trace_panic_dump); + + +/* --- /proc output --- */ + +static notrace int __ipipe_in_critical_trpath(long point_no) +{ + return ((WRAP_POINT_NO(point_no-print_path->begin) < + WRAP_POINT_NO(print_path->end-print_path->begin)) || + ((print_path->end == print_path->begin) && + (WRAP_POINT_NO(point_no-print_path->end) > + print_post_trace))); +} + +static long __ipipe_signed_tsc2us(long long tsc) +{ + unsigned long long abs_tsc; + long us; + + /* ipipe_tsc2us works on unsigned => handle sign separately */ + abs_tsc = (tsc >= 0) ? tsc : -tsc; + us = ipipe_tsc2us(abs_tsc); + if (tsc < 0) + return -us; + else + return us; +} + +static void +__ipipe_trace_point_type(char *buf, struct ipipe_trace_point *point) +{ + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + strcpy(buf, "func "); + break; + + case IPIPE_TRACE_BEGIN: + strcpy(buf, "begin "); + break; + + case IPIPE_TRACE_END: + strcpy(buf, "end "); + break; + + case IPIPE_TRACE_FREEZE: + strcpy(buf, "freeze "); + break; + + case IPIPE_TRACE_SPECIAL: + sprintf(buf, "(0x%02x) ", + point->type >> IPIPE_TYPE_BITS); + break; + + case IPIPE_TRACE_PID: + sprintf(buf, "[%5d] ", (pid_t)point->v); + break; + } +} + +static void +__ipipe_print_pathmark(struct seq_file *m, struct ipipe_trace_point *point) +{ + char mark = ' '; + int point_no = point - print_path->point; + int i; + + if (print_path->end == point_no) + mark = '<'; + else if (print_path->begin == point_no) + mark = '>'; + else if (__ipipe_in_critical_trpath(point_no)) + mark = ':'; + seq_printf(m, "%c%c", mark, + (point->flags & IPIPE_TFLG_HWIRQ_OFF) ? '|' : ' '); + + if (!verbose_trace) + return; + + for (i = IPIPE_TFLG_DOMSTATE_BITS; i >= 0; i--) + seq_printf(m, "%c", + (IPIPE_TFLG_CURRENT_DOMAIN(point) == i) ? + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? + '#' : '+') : + (IPIPE_TFLG_DOMAIN_STALLED(point, i) ? '*' : ' ')); +} + +static void +__ipipe_print_delay(struct seq_file *m, struct ipipe_trace_point *point) +{ + unsigned long delay = 0; + int next; + char *mark = " "; + + next = WRAP_POINT_NO(point+1 - print_path->point); + + if (next != print_path->trace_pos) + delay = ipipe_tsc2ns(print_path->point[next].timestamp - + point->timestamp); + + if (__ipipe_in_critical_trpath(point - print_path->point)) { + if (delay > IPIPE_DELAY_WARN) + mark = "! "; + else if (delay > IPIPE_DELAY_NOTE) + mark = "+ "; + } + seq_puts(m, mark); + + if (verbose_trace) + seq_printf(m, "%3lu.%03lu%c ", delay/1000, delay%1000, + (point->flags & IPIPE_TFLG_NMI_HIT) ? 'N' : ' '); + else + seq_puts(m, " "); +} + +static void __ipipe_print_symname(struct seq_file *m, unsigned long eip) +{ + char namebuf[KSYM_NAME_LEN+1]; + unsigned long size, offset; + const char *sym_name; + char *modname; + + sym_name = kallsyms_lookup(eip, &size, &offset, &modname, namebuf); + + /* printing to /proc? */ + if (m) { + if (sym_name) { + if (verbose_trace) { + seq_printf(m, "%s+0x%lx", sym_name, offset); + if (modname) + seq_printf(m, " [%s]", modname); + } else + seq_puts(m, sym_name); + } else + seq_printf(m, "<%08lx>", eip); + } else { + /* panic dump */ + if (sym_name) { + printk("%s+0x%lx", sym_name, offset); + if (modname) + printk(" [%s]", modname); + } + } +} + +#if defined(CONFIG_XENO_OPT_DEBUG) || defined(CONFIG_DEBUG_PREEMPT) +static void __ipipe_print_dbgwarning(struct seq_file *m) +{ + seq_puts(m, "\n******** WARNING ********\n" + "The following debugging options will increase the observed " + "latencies:\n" +#ifdef CONFIG_XENO_OPT_DEBUG + " o CONFIG_XENO_OPT_DEBUG\n" +#endif /* CONFIG_XENO_OPT_DEBUG */ +#ifdef CONFIG_XENO_OPT_DEBUG_QUEUES + " o CONFIG_XENO_OPT_DEBUG_QUEUES (very costly)\n" +#endif /* CONFIG_XENO_OPT_DEBUG */ +#ifdef CONFIG_DEBUG_PREEMPT + " o CONFIG_DEBUG_PREEMPT\n" +#endif /* CONFIG_DEBUG_PREEMPT */ + "\n"); +} +#else /* !WARN_ON_DEBUGGING_LATENCIES */ +# define __ipipe_print_dbgwarning(m) +#endif /* WARN_ON_DEBUGGING_LATENCIES */ + +static void __ipipe_print_headline(struct seq_file *m) +{ + if (verbose_trace) { + const char *name[4] = { [0 ... 3] = "" }; + struct list_head *pos; + int i = 0; + + list_for_each_prev(pos, &__ipipe_pipeline) { + struct ipipe_domain *ipd = + list_entry(pos, struct ipipe_domain, p_link); + + name[i] = ipd->name; + if (++i > 3) + break; + } + + seq_printf(m, + " +----- Hard IRQs ('|': locked)\n" + " |+---- %s\n" + " ||+--- %s\n" + " |||+-- %s\n" + " ||||+- %s%s\n" + " ||||| +---------- " + "Delay flag ('+': > %d us, '!': > %d us)\n" + " ||||| | +- " + "NMI noise ('N')\n" + " ||||| | |\n" + " Type User Val. Time Delay Function " + "(Parent)\n", + name[3], name[2], name[1], name[0], + name[0] ? " ('*': domain stalled, '+': current, " + "'#': current+stalled)" : "", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); + } else + seq_printf(m, + " +--------------- Hard IRQs ('|': locked)\n" + " | +- Delay flag " + "('+': > %d us, '!': > %d us)\n" + " | |\n" + " Type Time Function (Parent)\n", + IPIPE_DELAY_NOTE/1000, IPIPE_DELAY_WARN/1000); +} + +static void *__ipipe_max_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + down(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + unsigned long length_usecs; + int points, i; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the longest of all per-cpu paths */ + print_path = NULL; + for_each_online_cpu(i) { + path = &trace_paths[i][max_path[i]]; + if ((print_path == NULL) || + (path->length > print_path->length)) + print_path = path; + } + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + /* does this path actually contain data? */ + if (print_path->end == print_path->begin) + return NULL; + + /* number of points inside the critical path */ + points = WRAP_POINT_NO(print_path->end-print_path->begin+1); + + /* pre- and post-tracing length, post-trace length was frozen + in __ipipe_trace, pre-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = pre_trace; + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (points+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 1 - points - + print_post_trace; + + length_usecs = ipipe_tsc2us(print_path->length); + seq_printf(m, "I-pipe worst-case tracing service on %s/ipipe-%s\n" + "------------------------------------------------------------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + __ipipe_print_dbgwarning(m); + seq_printf(m, "Begin: %lld cycles, Trace Points: %d (-%d/+%d), " + "Length: %lu us\n\n", + print_path->point[print_path->begin].timestamp, + points, print_pre_trace, print_post_trace, length_usecs); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + n)]; +} + +static void *__ipipe_prtrace_next(struct seq_file *m, void *p, loff_t *pos) +{ + loff_t n = ++*pos; + + /* check if we are inside the trace range with the next entry */ + if (n >= WRAP_POINT_NO(print_path->end - print_path->begin + 1 + + print_pre_trace + print_post_trace)) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin - + print_pre_trace + *pos)]; +} + +static void __ipipe_prtrace_stop(struct seq_file *m, void *p) +{ + if (print_path) + print_path->dump_lock = 0; + up(&out_mutex); +} + +static int __ipipe_prtrace_show(struct seq_file *m, void *p) +{ + long time; + struct ipipe_trace_point *point = p; + char buf[16]; + + if (!point->eip) { + seq_puts(m, "--\n"); + return 0; + } + + __ipipe_print_pathmark(m, point); + __ipipe_trace_point_type(buf, point); + seq_puts(m, buf); + if (verbose_trace) + switch (point->type & IPIPE_TYPE_MASK) { + case IPIPE_TRACE_FUNC: + seq_puts(m, " "); + break; + + case IPIPE_TRACE_PID: + __ipipe_get_task_info(buf, point, 0); + seq_puts(m, buf); + break; + + default: + seq_printf(m, "0x%08lx ", point->v); + } + + time = __ipipe_signed_tsc2us(point->timestamp - + print_path->point[print_path->begin].timestamp); + seq_printf(m, "%5ld", time); + + __ipipe_print_delay(m, point); + __ipipe_print_symname(m, point->eip); + seq_puts(m, " ("); + __ipipe_print_symname(m, point->parent_eip); + seq_puts(m, ")\n"); + + return 0; +} + +static struct seq_operations __ipipe_max_ptrace_ops = { + .start = __ipipe_max_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_max_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_max_ptrace_ops); +} + +static ssize_t +__ipipe_max_reset(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + down(&out_mutex); + ipipe_trace_max_reset(); + up(&out_mutex); + + return count; +} + +struct file_operations __ipipe_max_prtrace_fops = { + .open = __ipipe_max_prtrace_open, + .read = seq_read, + .write = __ipipe_max_reset, + .llseek = seq_lseek, + .release = seq_release, +}; + +static void *__ipipe_frozen_prtrace_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + + down(&out_mutex); + + if (!n) { + struct ipipe_trace_path *path; + int i; + unsigned long flags; + + /* protect against max_path/frozen_path updates while we + * haven't locked our target path, also avoid recursively + * taking global_path_lock from NMI context */ + flags = __ipipe_global_path_lock(); + + /* find the first of all per-cpu frozen paths */ + print_path = NULL; + for_each_online_cpu(i) { + path = &trace_paths[i][frozen_path[i]]; + if (path->end >= 0) + print_path = path; + } + if (print_path) + print_path->dump_lock = 1; + + __ipipe_global_path_unlock(flags); + + if (!print_path) + return NULL; + + /* back- and post-tracing length, post-trace length was frozen + in __ipipe_trace, back-trace may have to be reduced due to + buffer overrun */ + print_pre_trace = back_trace-1; /* substract freeze point */ + print_post_trace = WRAP_POINT_NO(print_path->trace_pos - + print_path->end - 1); + if (1+pre_trace+print_post_trace > IPIPE_TRACE_POINTS - 1) + print_pre_trace = IPIPE_TRACE_POINTS - 2 - + print_post_trace; + + seq_printf(m, "I-pipe frozen back-tracing service on %s/ipipe-%s\n" + "------------------------------------------------------" + "------\n", + UTS_RELEASE, IPIPE_ARCH_STRING); + __ipipe_print_dbgwarning(m); + seq_printf(m, "Freeze: %lld cycles, Trace Points: %d (+%d)\n\n", + print_path->point[print_path->begin].timestamp, + print_pre_trace+1, print_post_trace); + __ipipe_print_headline(m); + } + + /* check if we are inside the trace range */ + if (n >= print_pre_trace + 1 + print_post_trace) + return NULL; + + /* return the next point to be shown */ + return &print_path->point[WRAP_POINT_NO(print_path->begin- + print_pre_trace+n)]; +} + +static struct seq_operations __ipipe_frozen_ptrace_ops = { + .start = __ipipe_frozen_prtrace_start, + .next = __ipipe_prtrace_next, + .stop = __ipipe_prtrace_stop, + .show = __ipipe_prtrace_show +}; + +static int __ipipe_frozen_prtrace_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &__ipipe_frozen_ptrace_ops); +} + +static ssize_t +__ipipe_frozen_ctrl(struct file *file, const char __user *pbuffer, + size_t count, loff_t *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, pbuffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + down(&out_mutex); + ipipe_trace_frozen_reset(); + if (val > 0) + ipipe_trace_freeze(-1); + up(&out_mutex); + + return count; +} + +struct file_operations __ipipe_frozen_prtrace_fops = { + .open = __ipipe_frozen_prtrace_open, + .read = seq_read, + .write = __ipipe_frozen_ctrl, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __ipipe_rd_proc_val(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%u\n", *(int *)data); + len -= off; + if (len <= off + count) + *eof = 1; + *start = page + off; + if (len > count) + len = count; + if (len < 0) + len = 0; + + return len; +} + +static int __ipipe_wr_proc_val(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + char *end, buf[16]; + int val; + int n; + + n = (count > sizeof(buf) - 1) ? sizeof(buf) - 1 : count; + + if (copy_from_user(buf, buffer, n)) + return -EFAULT; + + buf[n] = '\0'; + val = simple_strtol(buf, &end, 0); + + if (((*end != '\0') && !isspace(*end)) || (val < 0)) + return -EINVAL; + + down(&out_mutex); + *(int *)data = val; + up(&out_mutex); + + return count; +} + +extern struct proc_dir_entry *ipipe_proc_root; + +static void __init +__ipipe_create_trace_proc_val(struct proc_dir_entry *trace_dir, + const char *name, int *value_ptr) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry(name, 0644, trace_dir); + if (entry) { + entry->data = value_ptr; + entry->read_proc = __ipipe_rd_proc_val; + entry->write_proc = __ipipe_wr_proc_val; + entry->owner = THIS_MODULE; + } +} + +void __init __ipipe_init_tracer(void) +{ + struct proc_dir_entry *trace_dir; + struct proc_dir_entry *entry; +#ifdef CONFIG_IPIPE_TRACE_VMALLOC + int cpu, path; + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + trace_paths[cpu] = vmalloc( + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + if (!trace_paths) { + printk(KERN_ERR "I-pipe: " + "insufficient memory for trace buffer.\n"); + return; + } + memset(trace_paths[cpu], 0, + sizeof(struct ipipe_trace_path) * IPIPE_TRACE_PATHS); + for (path = 0; path < IPIPE_TRACE_PATHS; path++) { + trace_paths[cpu][path].begin = -1; + trace_paths[cpu][path].end = -1; + } + } + ipipe_trace_enable = CONFIG_IPIPE_TRACE_ENABLE_VALUE; +#endif /* CONFIG_IPIPE_TRACE_VMALLOC */ + + trace_dir = create_proc_entry("trace", S_IFDIR, ipipe_proc_root); + + entry = create_proc_entry("max", 0644, trace_dir); + if (entry) + entry->proc_fops = &__ipipe_max_prtrace_fops; + + entry = create_proc_entry("frozen", 0644, trace_dir); + if (entry) + entry->proc_fops = &__ipipe_frozen_prtrace_fops; + + __ipipe_create_trace_proc_val(trace_dir, "pre_trace_points", + &pre_trace); + __ipipe_create_trace_proc_val(trace_dir, "post_trace_points", + &post_trace); + __ipipe_create_trace_proc_val(trace_dir, "back_trace_points", + &back_trace); + __ipipe_create_trace_proc_val(trace_dir, "verbose", + &verbose_trace); + __ipipe_create_trace_proc_val(trace_dir, "enable", + &ipipe_trace_enable); +} Index: linux-2.6.16.33/kernel/irq/handle.c =================================================================== --- linux-2.6.16.33.orig/kernel/irq/handle.c +++ linux-2.6.16.33/kernel/irq/handle.c @@ -81,6 +81,17 @@ fastcall int handle_IRQ_event(unsigned i { int ret, retval = 0, status = 0; +#ifdef CONFIG_IPIPE + /* + * If processing a timer tick, pass the original regs as + * collected during preemption and not our phony - always + * kernel-originated - frame, so that we don't wreck the + * profiling code. + */ + if (__ipipe_tick_irq == irq) + regs = __ipipe_tick_regs + smp_processor_id(); +#endif /* CONFIG_IPIPE */ + if (!(action->flags & SA_INTERRUPT)) local_irq_enable(); @@ -117,16 +128,20 @@ fastcall unsigned int __do_IRQ(unsigned /* * No locking required for CPU-local interrupts: */ +#ifndef CONFIG_IPIPE if (desc->handler->ack) desc->handler->ack(irq); +#endif /* !CONFIG_IPIPE */ action_ret = handle_IRQ_event(irq, regs, desc->action); desc->handler->end(irq); return 1; } spin_lock(&desc->lock); +#ifndef CONFIG_IPIPE if (desc->handler->ack) desc->handler->ack(irq); +#endif /* !CONFIG_IPIPE */ /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested Index: linux-2.6.16.33/kernel/printk.c =================================================================== --- linux-2.6.16.33.orig/kernel/printk.c +++ linux-2.6.16.33/kernel/printk.c @@ -511,6 +511,78 @@ __attribute__((weak)) unsigned long long * printf(3) */ +#ifdef CONFIG_IPIPE + +static ipipe_spinlock_t __ipipe_printk_lock = IPIPE_SPIN_LOCK_UNLOCKED; + +static int __ipipe_printk_fill; + +static char __ipipe_printk_buf[__LOG_BUF_LEN]; + +void __ipipe_flush_printk (unsigned virq, void *cookie) +{ + char *p = __ipipe_printk_buf; + int len, lmax, out = 0; + unsigned long flags; + + goto start; + + do { + spin_unlock_irqrestore_hw(&__ipipe_printk_lock,flags); + start: + lmax = __ipipe_printk_fill; + while (out < lmax) { + len = strlen(p) + 1; + printk("%s",p); + p += len; + out += len; + } + spin_lock_irqsave_hw(&__ipipe_printk_lock,flags); + } + while (__ipipe_printk_fill != lmax); + + __ipipe_printk_fill = 0; + + spin_unlock_irqrestore_hw(&__ipipe_printk_lock,flags); +} + +asmlinkage int printk(const char *fmt, ...) +{ + int r, fbytes, oldcount; + unsigned long flags; + va_list args; + + va_start(args, fmt); + + if (ipipe_current_domain == ipipe_root_domain || + test_bit(IPIPE_SPRINTK_FLAG,&ipipe_current_domain->flags) || + oops_in_progress) { + r = vprintk(fmt, args); + goto out; + } + + spin_lock_irqsave_hw(&__ipipe_printk_lock,flags); + + oldcount = __ipipe_printk_fill; + fbytes = __LOG_BUF_LEN - oldcount; + + if (fbytes > 1) { + r = vscnprintf(__ipipe_printk_buf + __ipipe_printk_fill, + fbytes, fmt, args) + 1; /* account for the null byte */ + __ipipe_printk_fill += r; + } else + r = 0; + + spin_unlock_irqrestore_hw(&__ipipe_printk_lock,flags); + + if (oldcount == 0) + ipipe_trigger_irq(__ipipe_printk_virq); +out: + va_end(args); + + return r; +} +#else /* !CONFIG_IPIPE */ asmlinkage int printk(const char *fmt, ...) { va_list args; @@ -522,6 +594,7 @@ asmlinkage int printk(const char *fmt, . return r; } +#endif /* CONFIG_IPIPE */ /* cpu currently holding logbuf_lock */ static volatile unsigned int printk_cpu = UINT_MAX; Index: linux-2.6.16.33/kernel/sched.c =================================================================== --- linux-2.6.16.33.orig/kernel/sched.c +++ linux-2.6.16.33/kernel/sched.c @@ -1169,7 +1169,7 @@ static int try_to_wake_up(task_t *p, uns rq = task_rq_lock(p, &flags); old_state = p->state; - if (!(old_state & state)) + if (!(old_state & state) || (old_state & TASK_NOWAKEUP)) goto out; if (p->array) @@ -1571,6 +1571,8 @@ asmlinkage void schedule_tail(task_t *pr #endif if (current->set_child_tid) put_user(current->pid, current->set_child_tid); + + ipipe_init_notify(current); } /* @@ -2895,13 +2897,17 @@ asmlinkage void __sched schedule(void) unsigned long run_time; int cpu, idx, new_prio; +#ifdef CONFIG_IPIPE + if (unlikely(!ipipe_root_domain_p)) + return; +#endif /* CONFIG_IPIPE */ /* * Test if we are atomic. Since do_exit() needs to call into * schedule() atomically, we ignore that path for now. * Otherwise, whine if we are scheduling when we should not be. */ if (likely(!current->exit_state)) { - if (unlikely(in_atomic())) { + if (unlikely(in_atomic() && !(current->state & TASK_ATOMICSWITCH))) { printk(KERN_ERR "scheduling while atomic: " "%s/0x%08x/%d\n", current->comm, preempt_count(), current->pid); @@ -2910,8 +2916,13 @@ asmlinkage void __sched schedule(void) } profile_hit(SCHED_PROFILING, __builtin_return_address(0)); + if (unlikely(current->state & TASK_ATOMICSWITCH)) { + current->state &= ~TASK_ATOMICSWITCH; + goto need_resched_nodisable; + } need_resched: preempt_disable(); +need_resched_nodisable: prev = current; release_kernel_lock(prev); need_resched_nonpreemptible: @@ -3050,6 +3061,8 @@ switch_tasks: prepare_task_switch(rq, next); prev = context_switch(rq, prev, next); barrier(); + if (task_hijacked(prev)) + return; /* * this_rq must be evaluated again because prev may have moved * CPUs since it called schedule(), thus the 'rq' on its stack @@ -3063,7 +3076,7 @@ switch_tasks: if (unlikely(reacquire_kernel_lock(prev) < 0)) goto need_resched_nonpreemptible; preempt_enable_no_resched(); - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) + if (unlikely(test_thread_flag(TIF_NEED_RESCHED) && ipipe_root_domain_p)) goto need_resched; } @@ -3082,6 +3095,11 @@ asmlinkage void __sched preempt_schedule struct task_struct *task = current; int saved_lock_depth; #endif +#ifdef CONFIG_IPIPE + /* Do not reschedule over non-Linux domains. */ + if (unlikely(!ipipe_root_domain_p)) + return; +#endif /* CONFIG_IPIPE */ /* * If there is a non-zero preempt_count or interrupts are disabled, * we do not want to preempt the current task. Just return.. @@ -3720,6 +3738,7 @@ recheck: deactivate_task(p, rq); oldprio = p->prio; __setscheduler(p, policy, param->sched_priority); + ipipe_setsched_notify(p); if (array) { __activate_task(p, rq); /* @@ -6189,3 +6208,50 @@ void set_curr_task(int cpu, task_t *p) } #endif + +#ifdef CONFIG_IPIPE + +int ipipe_setscheduler_root (struct task_struct *p, int policy, int prio) +{ + prio_array_t *array; + unsigned long flags; + runqueue_t *rq; + int oldprio; + + rq = task_rq_lock(p, &flags); + array = p->array; + if (array) + deactivate_task(p, rq); + oldprio = p->prio; + __setscheduler(p, policy, prio); + if (array) { + __activate_task(p, rq); + if (task_running(rq, p)) { + if (p->prio > oldprio) + resched_task(rq->curr); + } else if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); + } + task_rq_unlock(rq, &flags); + + return 0; +} + +EXPORT_SYMBOL(ipipe_setscheduler_root); + +int ipipe_reenter_root (struct task_struct *prev, int policy, int prio) +{ + finish_task_switch(this_rq(), prev); + if (reacquire_kernel_lock(current) < 0) + ; + preempt_enable_no_resched(); + + if (current->policy != policy || current->rt_priority != prio) + return ipipe_setscheduler_root(current,policy,prio); + + return 0; +} + +EXPORT_SYMBOL(ipipe_reenter_root); + +#endif /* CONFIG_IPIPE */ Index: linux-2.6.16.33/kernel/signal.c =================================================================== --- linux-2.6.16.33.orig/kernel/signal.c +++ linux-2.6.16.33/kernel/signal.c @@ -604,6 +604,7 @@ void signal_wake_up(struct task_struct * unsigned int mask; set_tsk_thread_flag(t, TIF_SIGPENDING); + ipipe_sigwake_notify(t); /* TIF_SIGPENDING must be set first. */ /* * For SIGKILL, we want to wake it up in the stopped/traced case. Index: linux-2.6.16.33/lib/Kconfig.debug =================================================================== --- linux-2.6.16.33.orig/lib/Kconfig.debug +++ linux-2.6.16.33/lib/Kconfig.debug @@ -23,6 +23,8 @@ config MAGIC_SYSRQ keys are documented in . Don't say Y unless you really know what this hack does. +source "kernel/ipipe/Kconfig.debug" + config DEBUG_KERNEL bool "Kernel debugging" help Index: linux-2.6.16.33/lib/smp_processor_id.c =================================================================== --- linux-2.6.16.33.orig/lib/smp_processor_id.c +++ linux-2.6.16.33/lib/smp_processor_id.c @@ -13,6 +13,11 @@ unsigned int debug_smp_processor_id(void int this_cpu = raw_smp_processor_id(); cpumask_t this_mask; +#ifdef CONFIG_IPIPE + if (ipipe_current_domain != ipipe_root_domain) + return this_cpu; +#endif /* CONFIG_IPIPE */ + if (likely(preempt_count)) goto out; Index: linux-2.6.16.33/lib/spinlock_debug.c =================================================================== --- linux-2.6.16.33.orig/lib/spinlock_debug.c +++ linux-2.6.16.33/lib/spinlock_debug.c @@ -10,6 +10,7 @@ #include #include #include +#include static void spin_bug(spinlock_t *lock, const char *msg) { @@ -96,6 +97,8 @@ void _raw_spin_lock(spinlock_t *lock) debug_spin_lock_after(lock); } +EXPORT_SYMBOL(_raw_spin_lock); + int _raw_spin_trylock(spinlock_t *lock) { int ret = __raw_spin_trylock(&lock->raw_lock); @@ -111,12 +114,16 @@ int _raw_spin_trylock(spinlock_t *lock) return ret; } +EXPORT_SYMBOL(_raw_spin_trylock); + void _raw_spin_unlock(spinlock_t *lock) { debug_spin_unlock(lock); __raw_spin_unlock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_spin_unlock); + static void rwlock_bug(rwlock_t *lock, const char *msg) { static long print_once = 1; @@ -168,6 +175,8 @@ void _raw_read_lock(rwlock_t *lock) __raw_read_lock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_read_lock); + int _raw_read_trylock(rwlock_t *lock) { int ret = __raw_read_trylock(&lock->raw_lock); @@ -181,12 +190,16 @@ int _raw_read_trylock(rwlock_t *lock) return ret; } +EXPORT_SYMBOL(_raw_read_trylock); + void _raw_read_unlock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); __raw_read_unlock(&lock->raw_lock); } +EXPORT_SYMBOL(_raw_read_unlock); + static inline void debug_write_lock_before(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); @@ -243,6 +256,8 @@ void _raw_write_lock(rwlock_t *lock) debug_write_lock_after(lock); } +EXPORT_SYMBOL(_raw_write_lock); + int _raw_write_trylock(rwlock_t *lock) { int ret = __raw_write_trylock(&lock->raw_lock); @@ -258,8 +273,12 @@ int _raw_write_trylock(rwlock_t *lock) return ret; } +EXPORT_SYMBOL(_raw_write_trylock); + void _raw_write_unlock(rwlock_t *lock) { debug_write_unlock(lock); __raw_write_unlock(&lock->raw_lock); } + +EXPORT_SYMBOL(_raw_write_unlock); Index: linux-2.6.16.33/mm/vmalloc.c =================================================================== --- linux-2.6.16.33.orig/mm/vmalloc.c +++ linux-2.6.16.33/mm/vmalloc.c @@ -19,6 +19,7 @@ #include #include +#include DEFINE_RWLOCK(vmlist_lock); @@ -148,10 +149,14 @@ int map_vm_area(struct vm_struct *area, BUG_ON(addr >= end); pgd = pgd_offset_k(addr); do { + pgd_t oldpgd; + memcpy(&oldpgd,pgd,sizeof(pgd_t)); next = pgd_addr_end(addr, end); err = vmap_pud_range(pgd, addr, next, prot, pages); if (err) break; + if (pgd_val(oldpgd) != pgd_val(*pgd)) + set_pgdir(addr, *pgd); } while (pgd++, addr = next, addr != end); flush_cache_vmap((unsigned long) area->addr, end); return err; --------------020001080902050609010902--