From mboxrd@z Thu Jan 1 00:00:00 1970 From: Mark Hounschell Subject: Re: WARNING: unexpected IO-APIC; all interrupts to one CPU on smp Date: Thu, 23 May 2002 12:33:55 -0400 Sender: linux-smp-owner@vger.kernel.org Message-ID: <3CED19F3.429EC4EA@cfl.rr.com> References: Reply-To: dmarkh@cfl.rr.com Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Return-path: List-Id: Content-Type: text/plain; charset="us-ascii" To: "Schmitz, Dave R" Cc: "'linux-smp@vger.kernel.org'" "Schmitz, Dave R" wrote: > > Hello, > > I received the following boot message on several Supermicro motherboards > (P4DC6+, P4DCE+, P4DP6) which have among them two Intel chipsets > (860, E7500) when using both RedHat 7.1 and RedHat 7.2. > > <<< > WARNING: unexpected IO-APIC, please mail > to linux-smp@vger.kernel.org > >>> > > Results for RedHat 7.2 are attached for four of the PCs. > > Would this be related to the observation that on all machines > all interrupts go to CPU0 and none go to CPU1? > (We do a "cat /proc/interrupts" to see this.) > My colleagues using dual-CPU Tyan motherboards (Athlon?) > do not see this, and for slightly slower machines they > get slightly faster performance. > > Five dmesg logs for four machines are attached. > All four have: > dual Pentium IV Xeon 1.8GHz processors > Supermicro motherboards > RedHat Linux 7.2 with the included 2.4.7-10smp kernel. > except as noted below. I have a number of these mother boards with 1.7GZ cpus in them. They all have the "all interrupts in cpu0" problem but there is a patch that I tried with success for this. It is an irq_balance patch. Don't remember where it came from. Only thing with it was, once you apply it you can no longer bind an irq to a particular processor if you want to. I had to back it out because my app and the pci card we are developing, requires me to bind a process/threads and the irq of the pci card to 1 processor and force all other processes and all other irq to all other cpu's. It's a emulation of an old proprietary Real-Time OS and the card is a pci card with 6 high-res timers and 8 external interrupts. The only way to get good interrupt latency/determism numbers is to do the above. With this patch I could no longer set the irq's to the processor I wanted. When not running the "app" I wanted irq balancing accross the cpu's. It worked ok for that. It's below and applies to 2.4.18. Your other problem I don't see at all and the MPS is even set to 1.4 on all my boards bios. You should probably try setting it to 1.1 and see if it helps. --- linux/kernel/sched.c.orig Tue Feb 5 13:11:35 2002 +++ linux/kernel/sched.c Tue Feb 5 13:12:48 2002 @@ -118,6 +118,11 @@ #define can_schedule(p,cpu) \ ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu)) +int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == idle_task(cpu); +} + #else #define idle_task(cpu) (&init_task) --- linux/include/linux/sched.h.orig Tue Feb 5 13:13:09 2002 +++ linux/include/linux/sched.h Tue Feb 5 13:14:00 2002 @@ -144,6 +144,7 @@ extern void sched_init(void); extern void init_idle(void); +extern int idle_cpu(int cpu); extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); --- linux/include/asm-i386/hardirq.h.orig Tue Feb 5 13:10:39 2002 +++ linux/include/asm-i386/hardirq.h Tue Feb 5 13:14:00 2002 @@ -12,6 +12,7 @@ unsigned int __local_bh_count; unsigned int __syscall_count; struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ + unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; --- linux/arch/i386/kernel/io_apic.c.orig Tue Feb 5 13:10:37 2002 +++ linux/arch/i386/kernel/io_apic.c Tue Feb 5 13:15:23 2002 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -163,6 +164,86 @@ clear_IO_APIC_pin(apic, pin); } +static void set_ioapic_affinity (unsigned int irq, unsigned long mask) +{ + unsigned long flags; + + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#if CONFIG_SMP + +typedef struct { + unsigned int cpu; + unsigned long timestamp; +} ____cacheline_aligned irq_balance_t; + +static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned + = { [ 0 ... NR_IRQS-1 ] = { 1, 0 } }; + +extern unsigned long irq_affinity [NR_IRQS]; + +#endif + +#define IDLE_ENOUGH(cpu,now) \ + (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) + +#define IRQ_ALLOWED(cpu,allowed_mask) \ + ((1 << cpu) & (allowed_mask)) + +static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction) +{ + int search_idle = 1; + int cpu = curr_cpu; + + goto inside; + + do { + if (unlikely(cpu == curr_cpu)) + search_idle = 0; +inside: + if (direction == 1) { + cpu++; + if (cpu >= smp_num_cpus) + cpu = 0; + } else { + cpu--; + if (cpu == -1) + cpu = smp_num_cpus-1; + } + } while (!IRQ_ALLOWED(cpu,allowed_mask) || + (search_idle && !IDLE_ENOUGH(cpu,now))); + + return cpu; +} + +static inline void balance_irq(int irq) +{ +#if CONFIG_SMP + irq_balance_t *entry = irq_balance + irq; + unsigned long now = jiffies; + + if (unlikely(entry->timestamp != now)) { + unsigned long allowed_mask; + int random_number; + + rdtscl(random_number); + random_number &= 1; + + allowed_mask = cpu_online_map & irq_affinity[irq]; + entry->timestamp = now; + entry->cpu = move(entry->cpu, allowed_mask, now, random_number); + set_ioapic_affinity(irq, 1 << entry->cpu); + } +#endif +} + /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -653,8 +734,7 @@ } /* - * Set up the 8259A-master output pin as broadcast to all - * CPUs. + * Set up the 8259A-master output pin: */ void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) { @@ -1174,6 +1254,7 @@ */ static void ack_edge_ioapic_irq(unsigned int irq) { + balance_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1213,6 +1294,7 @@ unsigned long v; int i; + balance_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 * of I/O APIC (that's the 82093AA and cores integrated into various @@ -1268,19 +1350,6 @@ } static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ } - -static void set_ioapic_affinity (unsigned int irq, unsigned long mask) -{ - unsigned long flags; - /* - * Only the first 8 bits are valid. - */ - mask = mask << 24; - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = mask, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} /* * Level and edge triggered IO-APIC interrupts need different handling, --- linux/arch/i386/kernel/irq.c.orig Tue Feb 5 13:10:34 2002 +++ linux/arch/i386/kernel/irq.c Tue Feb 5 13:11:15 2002 @@ -1076,7 +1076,7 @@ static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; +unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { -- Mark Hounschell dmarkh@cfl.rr.com