From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark Hounschell Subject: Re: About IO-APIC - General question Date: Fri, 03 May 2002 18:13:01 -0400 Sender: linux-smp-owner@vger.kernel.org Message-ID: <3CD30B6D.696FF931@cfl.rr.com> References: <3CBBC3D6.1947D011@phys.uni-paderborn.de> <3CBC0F3D.3010806@tzi.org> <3CD2BA87.D03FB34C@phys.uni-paderborn.de> Reply-To: dmarkh@cfl.rr.com Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Return-path: List-Id: Content-Type: text/plain; charset="us-ascii" To: "Peter H. Koenig" Cc: linux-smp "Peter H. Koenig" wrote: > > Hello, > > Klaas, I would like to thank you for the data you provided. > > >From your first paragraph I take that there is the possibility of specifying > which processor(s) handle(s) interrupts manually. > > Klaas Zweck wrote: > > i made some test on a linux-smp machine(kernel 2.4.9). > > i had one process just doing some calculation on prim numbers. > > i locked this to cpu 1 and disabled all interrupts on cpu 1. > > ( in fact i routed all to cpu 0 ) > > then i started a make -j4 bzImage job in a remote > > shell to produce file i/o and network interrupts. > > i restricted all processes except the one doing the calculation > > from cpu 1 and delivered all interrupts only to cpu 0. > > Is it neccessary specifying this for getting interrupt sharing working ? > > My interrupts statistics table shows that the interrrupts are handled by > one > CPU solely: > > $ cat /proc/interrupts > CPU0 CPU1 > 0: 84363 0 IO-APIC-edge timer > 1: 2 0 IO-APIC-edge keyboard > 2: 0 0 XT-PIC cascade > 8: 1 0 IO-APIC-edge rtc > 11: 1661 0 IO-APIC-level eth0 > 14: 8741 0 IO-APIC-edge ide0 > 15: 2 0 IO-APIC-edge ide1 > NMI: 0 0 > LOC: 84283 84281 > ERR: 0 > MIS: 0 > > This differs somehow from the (old) IO-APIC documentation where in the > case > of SMP-boards the IRQ load is shared between the processors. > > This problem occurs on a Supermicro P4DC6 using an unreleased bios > release > (without that so far the APIC has not been recognized at all), the MPS > specification set to version 1.1 in the bios using kernel 2.4.10, 2.4.17 > and > 2.4.18. > > As some of the applications we run are quite Network or IO-intensive, we > would like to take advantage of the IRQ sharing. > > Any suggestions ? > > Pete There is an irq_balance patch floating around that works on this mother board. I have 2 or 3 of these with the same symptom. It was resolved by this irq_balance patch. I beleive it was written by Ingo Here it is There is another on for irq0 (timer) but you won't need it for this MB --- linux/kernel/sched.c.orig Tue Feb 5 13:11:35 2002 +++ linux/kernel/sched.c Tue Feb 5 13:12:48 2002 @@ -118,6 +118,11 @@ #define can_schedule(p,cpu) \ ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu)) +int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == idle_task(cpu); +} + #else #define idle_task(cpu) (&init_task) --- linux/include/linux/sched.h.orig Tue Feb 5 13:13:09 2002 +++ linux/include/linux/sched.h Tue Feb 5 13:14:00 2002 @@ -144,6 +144,7 @@ extern void sched_init(void); extern void init_idle(void); +extern int idle_cpu(int cpu); extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); --- linux/include/asm-i386/hardirq.h.orig Tue Feb 5 13:10:39 2002 +++ linux/include/asm-i386/hardirq.h Tue Feb 5 13:14:00 2002 @@ -12,6 +12,7 @@ unsigned int __local_bh_count; unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ + unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; --- linux/arch/i386/kernel/io_apic.c.orig Tue Feb 5 13:10:37 2002 +++ linux/arch/i386/kernel/io_apic.c Tue Feb 5 13:15:23 2002 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -163,6 +164,86 @@ clear_IO_APIC_pin(apic, pin); } +static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +{ + unsigned long flags; + + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#if CONFIG_SMP + +typedef struct { + unsigned int cpu; + unsigned long timestamp; +} ____cacheline_aligned irq_balance_t; + +static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned + = { [ 0 ... NR_IRQS-1 ] = { 1, 0 } }; + +extern unsigned long irq_affinity [NR_IRQS]; + +#endif + +#define IDLE_ENOUGH(cpu,now) \ + (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) + +#define IRQ_ALLOWED(cpu,allowed_mask) \ + ((1 << cpu) & (allowed_mask)) + +static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction) +{ + int search_idle = 1; + int cpu = curr_cpu; + + goto inside; + + do { + if (unlikely(cpu == curr_cpu)) + search_idle = 0; +inside: + if (direction == 1) { + cpu++; + if (cpu >= smp_num_cpus) + cpu = 0; + } else { + cpu--; + if (cpu == -1) + cpu = smp_num_cpus-1; + } + } while (!IRQ_ALLOWED(cpu,allowed_mask) || + (search_idle && !IDLE_ENOUGH(cpu,now))); + + return cpu; +} + +static inline void balance_irq(int irq) +{ +#if CONFIG_SMP + irq_balance_t *entry = irq_balance + irq; + unsigned long now = jiffies; + + if (unlikely(entry->timestamp != now)) { + unsigned long allowed_mask; + int random_number; + + rdtscl(random_number); + random_number &= 1; + + allowed_mask = cpu_online_map & irq_affinity[irq]; + entry->timestamp = now; + entry->cpu = move(entry->cpu, allowed_mask, now, random_number); + set_ioapic_affinity(irq, 1 << entry->cpu); + } +#endif +} + /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -653,8 +734,7 @@ } /* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. + * Set up the 8259A-master output pin: */ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) { @@ -1174,6 +1254,7 @@ */ static void ack_edge_ioapic_irq(unsigned int irq) { + balance_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1213,6 +1294,7 @@ unsigned long v; int i; + balance_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -1268,19 +1350,6 @@ } static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } - -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) -{ - unsigned long flags; - /* - * Only the first 8 bits are valid. - */ - mask = mask << 24; - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = mask, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} /* * Level and edge triggered IO-APIC interrupts need different handling, --- linux/arch/i386/kernel/irq.c.orig Tue Feb 5 13:10:34 2002 +++ linux/arch/i386/kernel/irq.c Tue Feb 5 13:11:15 2002 @@ -1076,7 +1076,7 @@ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) -- Mark Hounschell dmarkh@cfl.rr.com