public inbox for linux-smp@vger.kernel.org
 help / color / mirror / Atom feed
From: Mark Hounschell <dmarkh@cfl.rr.com>
To: "Schmitz, Dave R" <SchmiDR@LOUISVILLE.STORTEK.COM>
Cc: "'linux-smp@vger.kernel.org'" <linux-smp@vger.kernel.org>
Subject: Re: WARNING: unexpected IO-APIC; all interrupts to one CPU on smp
Date: Thu, 23 May 2002 12:33:55 -0400	[thread overview]
Message-ID: <3CED19F3.429EC4EA@cfl.rr.com> (raw)
In-Reply-To: BEB8A0E46889D31188CB002048406D70064551FC@lsv-msg03.stortek.com

"Schmitz, Dave R" wrote:
> 
> Hello,
> 
> I received the following boot message on several Supermicro motherboards
> (P4DC6+, P4DCE+, P4DP6) which have among them two Intel chipsets
> (860, E7500) when using both RedHat 7.1 and RedHat 7.2.
> 
> <<<
>  WARNING: unexpected IO-APIC, please mail
>           to linux-smp@vger.kernel.org
> >>>
> 
> Results for RedHat 7.2 are attached for four of the PCs.
> 
> Would this be related to the observation that on all machines
> all interrupts go to CPU0 and none go to CPU1?
> (We do a "cat /proc/interrupts" to see this.)
> My colleagues using dual-CPU Tyan motherboards (Athlon?)
> do not see this, and for slightly slower machines they
> get slightly faster performance.
> 
> Five dmesg logs for four machines are attached.
> All four have:
>         dual Pentium IV Xeon 1.8GHz processors
>         Supermicro motherboards
>         RedHat Linux 7.2 with the included 2.4.7-10smp kernel.
> except as noted below.

I have a number of these mother boards with 1.7GZ cpus in them. They all have the
"all interrupts in cpu0" problem but there is a patch that I tried with success for
this. It is an irq_balance patch. Don't remember where it came from. Only thing with
it was, once you apply it you can no longer bind an irq to a particular processor if
you want to. I had to back it out because my app and the pci card we are developing, 
requires me to bind a process/threads and the irq of the pci card to 1 processor and
force all other processes and all other irq to all other cpu's. It's a emulation of an
old proprietary Real-Time OS and the card is a pci card with 6 high-res timers and 8
external interrupts. The only way to get good interrupt latency/determism numbers is
to do the above. With this patch I could no longer set the irq's to the processor I wanted. When not running the "app" I wanted irq balancing accross the cpu's. It worked ok for that.
It's below and applies to 2.4.18.

Your other problem I don't see at all and the MPS is even set to 1.4 on all my boards bios.
You should probably try setting it to 1.1 and see if it helps.


--- linux/kernel/sched.c.orig   Tue Feb  5 13:11:35 2002
+++ linux/kernel/sched.c        Tue Feb  5 13:12:48 2002
@@ -118,6 +118,11 @@
 #define can_schedule(p,cpu) \
        ((p)->cpus_runnable & (p)->cpus_allowed & (1 << cpu))

+int idle_cpu(int cpu)
+{
+       return cpu_curr(cpu) == idle_task(cpu);
+}
+
 #else

 #define idle_task(cpu) (&init_task)
--- linux/include/linux/sched.h.orig    Tue Feb  5 13:13:09 2002
+++ linux/include/linux/sched.h Tue Feb  5 13:14:00 2002
@@ -144,6 +144,7 @@

 extern void sched_init(void);
 extern void init_idle(void);
+extern int idle_cpu(int cpu);
 extern void show_state(void);
 extern void cpu_init (void);
 extern void trap_init(void);
--- linux/include/asm-i386/hardirq.h.orig       Tue Feb  5 13:10:39 2002
+++ linux/include/asm-i386/hardirq.h    Tue Feb  5 13:14:00 2002
@@ -12,6 +12,7 @@
        unsigned int __local_bh_count;
        unsigned int __syscall_count;
        struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
+       unsigned long idle_timestamp;
        unsigned int __nmi_count;       /* arch dependent */
 } ____cacheline_aligned irq_cpustat_t;

--- linux/arch/i386/kernel/io_apic.c.orig       Tue Feb  5 13:10:37 2002
+++ linux/arch/i386/kernel/io_apic.c    Tue Feb  5 13:15:23 2002
@@ -28,6 +28,7 @@
 #include <linux/config.h>
 #include <linux/smp_lock.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>

 #include <asm/io.h>
 #include <asm/smp.h>
@@ -163,6 +164,86 @@
                        clear_IO_APIC_pin(apic, pin);
 }

+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+       unsigned long flags;
+
+       /*
+        * Only the first 8 bits are valid.
+        */
+       mask = mask << 24;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __DO_ACTION(1, = mask, )
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+#if CONFIG_SMP
+
+typedef struct {
+       unsigned int cpu;
+       unsigned long timestamp;
+} ____cacheline_aligned irq_balance_t;
+
+static irq_balance_t irq_balance[NR_IRQS] __cacheline_aligned
+                       = { [ 0 ... NR_IRQS-1 ] = { 1, 0 } };
+
+extern unsigned long irq_affinity [NR_IRQS];
+
+#endif
+
+#define IDLE_ENOUGH(cpu,now) \
+               (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
+
+#define IRQ_ALLOWED(cpu,allowed_mask) \
+               ((1 << cpu) & (allowed_mask))
+
+static unsigned long move(int curr_cpu, unsigned long allowed_mask, unsigned long now, int direction)
+{
+       int search_idle = 1;
+       int cpu = curr_cpu;
+
+       goto inside;
+
+       do {
+               if (unlikely(cpu == curr_cpu))
+                       search_idle = 0;
+inside:
+               if (direction == 1) {
+                       cpu++;
+                       if (cpu >= smp_num_cpus)
+                               cpu = 0;
+               } else {
+                       cpu--;
+                       if (cpu == -1)
+                               cpu = smp_num_cpus-1;
+               }
+       } while (!IRQ_ALLOWED(cpu,allowed_mask) ||
+                       (search_idle && !IDLE_ENOUGH(cpu,now)));
+
+       return cpu;
+}
+
+static inline void balance_irq(int irq)
+{
+#if CONFIG_SMP
+       irq_balance_t *entry = irq_balance + irq;
+       unsigned long now = jiffies;
+
+       if (unlikely(entry->timestamp != now)) {
+               unsigned long allowed_mask;
+               int random_number;
+
+               rdtscl(random_number);
+               random_number &= 1;
+
+               allowed_mask = cpu_online_map & irq_affinity[irq];
+               entry->timestamp = now;
+               entry->cpu = move(entry->cpu, allowed_mask, now, random_number);
+               set_ioapic_affinity(irq, 1 << entry->cpu);
+       }
+#endif
+}
+
 /*
  * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
  * specific CPU-side IRQs.
@@ -653,8 +734,7 @@
 }

 /*
- * Set up the 8259A-master output pin as broadcast to all
- * CPUs.
+ * Set up the 8259A-master output pin:
  */
 void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
 {
@@ -1174,6 +1254,7 @@
  */
 static void ack_edge_ioapic_irq(unsigned int irq)
 {
+       balance_irq(irq);
        if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
                                        == (IRQ_PENDING | IRQ_DISABLED))
                mask_IO_APIC_irq(irq);
@@ -1213,6 +1294,7 @@
        unsigned long v;
        int i;

+       balance_irq(irq);
 /*
  * It appears there is an erratum which affects at least version 0x11
  * of I/O APIC (that's the 82093AA and cores integrated into various
@@ -1268,19 +1350,6 @@
 }

 static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
-
-static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
-{
-       unsigned long flags;
-       /*
-        * Only the first 8 bits are valid.
-        */
-       mask = mask << 24;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __DO_ACTION(1, = mask, )
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}

 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
--- linux/arch/i386/kernel/irq.c.orig   Tue Feb  5 13:10:34 2002
+++ linux/arch/i386/kernel/irq.c        Tue Feb  5 13:11:15 2002
@@ -1076,7 +1076,7 @@

 static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];

-static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
 static int irq_affinity_read_proc (char *page, char **start, off_t off,
                        int count, int *eof, void *data)
 {


-- 
Mark Hounschell
dmarkh@cfl.rr.com

  reply	other threads:[~2002-05-23 16:33 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-05-23  0:24 WARNING: unexpected IO-APIC; all interrupts to one CPU on smp Schmitz, Dave R
2002-05-23 16:33 ` Mark Hounschell [this message]
     [not found] <BEB8A0E46889D31188CB002048406D70064551FC@lsv-msg03.stortek .com>
2002-05-23  7:49 ` Earle Nietzel
2002-05-23 15:06   ` Randy.Dunlap
2002-06-18 14:10   ` E. Robert Bogusta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3CED19F3.429EC4EA@cfl.rr.com \
    --to=dmarkh@cfl.rr.com \
    --cc=SchmiDR@LOUISVILLE.STORTEK.COM \
    --cc=linux-smp@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox