* [patch] Support multiple CPUs going through OS_MCA
@ 2007-04-27 21:29 Russ Anderson
2007-04-28 0:34 ` Yu, Fenghua
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Russ Anderson @ 2007-04-27 21:29 UTC (permalink / raw)
To: linux-ia64
[patch] Support multiple CPUs going through OS_MCA
Linux does not gracefully deal with multiple processors going
through OS_MCA aa part of the same MCA event. The first cpu
into OS_MCA grabs the ia64_mca_serialize lock. Subsequent
cpus wait for that lock, preventing them from reporting in as
rendezvoused. The first cpu waits 5 seconds then complains
that all the cpus have not rendezvoused. The first cpu then
handles its MCA and frees up all the rendezvoused cpus and
releases the ia64_mca_serialize lock. One of the subsequent
cpus going thought OS_MCA then gets the ia64_mca_serialize
lock, waits another 5 seconds and then complains that none of
the other cpus have rendezvoused.
This patch allows multiple CPUs to gracefully go through OS_MCA.
The first CPU into ia64_mca_handler() grabs a mca_count lock.
Subsequent CPUs into ia64_mca_handler() are added to a list of cpus
that need to go through OS_MCA (a bit set in mca_cpu), and report
in as rendezvoused, and but spin waiting their turn.
The first CPU sees everyone rendezvous, handles his MCA, wakes up
one of the other CPUs waiting to process their MCA (by clearing
one mca_cpu bit), and then waits for the other cpus to complete
their MCA handling. The next CPU handles his MCA and the process
repeats until all the CPUs have handled their MCA. When the last
CPU has handled it's MCA, it sets monarch_cpu to -1, releasing all
the CPUs.
In testing this works more reliably and faster.
Thanks to Keith Owens for suggesting numerous improvements
to this code.
Signed-off-by: Russ Anderson <rja@sgi.com>
---
arch/ia64/kernel/mca.c | 61 +++++++++++++++++++++++++++++++++++++++------
arch/ia64/kernel/mca_asm.S | 12 --------
include/asm-ia64/mca.h | 1
3 files changed, 55 insertions(+), 19 deletions(-)
Index: test/arch/ia64/kernel/mca.c
=================================--- test.orig/arch/ia64/kernel/mca.c 2007-04-13 13:28:52.822883294 -0500
+++ test/arch/ia64/kernel/mca.c 2007-04-27 10:39:46.880158709 -0500
@@ -57,6 +57,9 @@
*
* 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
* Add printing support for MCA/INIT.
+ *
+ * 2007-04-27 Russ Anderson <rja@sgi.com>
+ * Support multiple cpus going through OS_MCA in the same event.
*/
#include <linux/types.h>
#include <linux/init.h>
@@ -97,7 +100,6 @@
#endif
/* Used by mca_asm.S */
-u32 ia64_mca_serialize;
DEFINE_PER_CPU(u64, ia64_mca_data); /* = __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
@@ -963,11 +965,12 @@ ia64_mca_modify_original_stack(struct pt
goto no_mod;
}
+ if (r13 != sos->prev_IA64_KR_CURRENT) {
+ msg = "inconsistent previous current and r13";
+ goto no_mod;
+ }
+
if (!mca_recover_range(ms->pmsa_iip)) {
- if (r13 != sos->prev_IA64_KR_CURRENT) {
- msg = "inconsistent previous current and r13";
- goto no_mod;
- }
if ((r12 - r13) >= KERNEL_STACK_SIZE) {
msg = "inconsistent r12 and r13";
goto no_mod;
@@ -1187,6 +1190,13 @@ all_in:
* further MCA logging is enabled by clearing logs.
* Monarch also has the duty of sending wakeup-IPIs to pull the
* slave processors out of rendezvous spinloop.
+ *
+ * If multiple processors call into OS_MCA, the first will become
+ * the monarch. Subsequent cpus will be recorded in the mca_cpu
+ * bitmask. After the first monarch has processed its MCA, it
+ * will wake up the next cpu in the mca_cpu bitmask and then go
+ * into the rendezvous loop. When all processors have serviced
+ * their MCA, the last monarch frees up the rest of the processors.
*/
void
ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
@@ -1196,16 +1206,32 @@ ia64_mca_handler(struct pt_regs *regs, s
struct task_struct *previous_current;
struct ia64_mca_notify_die nd { .sos = sos, .monarch_cpu = &monarch_cpu };
+ static atomic_t mca_count;
+ static cpumask_t mca_cpu;
+ if (atomic_add_return(1, &mca_count) = 1) {
+ monarch_cpu = cpu;
+ sos->monarch = 1;
+ } else {
+ cpu_set(cpu, mca_cpu);
+ sos->monarch = 0;
+ }
mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
- monarch_cpu = cpu;
+
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
= NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
- ia64_wait_for_slaves(cpu, "MCA");
+ if (sos->monarch) {
+ ia64_wait_for_slaves(cpu, "MCA");
+ } else {
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+ while (cpu_isset(cpu, mca_cpu))
+ cpu_relax(); /* spin until monarch wakes us */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+ }
/* Wakeup all the processors which are spinning in the rendezvous loop.
* They will leave SAL, then spin in the OS with interrupts disabled
@@ -1232,6 +1258,7 @@ ia64_mca_handler(struct pt_regs *regs, s
rh->severity = sal_log_severity_corrected;
ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
sos->os_status = IA64_MCA_CORRECTED;
+ ia64_mlogbuf_dump();
} else {
/* Dump buffered message to console */
ia64_mlogbuf_finish(1);
@@ -1244,6 +1271,26 @@ ia64_mca_handler(struct pt_regs *regs, s
= NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
+
+ if (atomic_dec_return(&mca_count) > 0) {
+ int i;
+
+ /* wake up the next monarch cpu,
+ * and put this cpu in the rendez loop.
+ */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, mca_cpu)) {
+ monarch_cpu = i;
+ cpu_clear(i, mca_cpu); /* wake next cpu */
+ while (monarch_cpu != -1)
+ cpu_relax(); /* spin until last cpu leaves */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+ set_curr_task(cpu, previous_current);
+ return;
+ }
+ }
+ }
set_curr_task(cpu, previous_current);
monarch_cpu = -1;
}
Index: test/arch/ia64/kernel/mca_asm.S
=================================--- test.orig/arch/ia64/kernel/mca_asm.S 2007-04-13 13:28:52.830884267 -0500
+++ test/arch/ia64/kernel/mca_asm.S 2007-04-13 18:05:19.565878142 -0500
@@ -141,14 +141,6 @@ ia64_do_tlb_purge:
//StartMain////////////////////////////////////////////////////////////////////
ia64_os_mca_dispatch:
- // Serialize all MCA processing
- mov r3=1;;
- LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
-ia64_os_mca_spin:
- xchg4 r4=[r2],r3;;
- cmp.ne p6,p0=r4,r0
-(p6) br ia64_os_mca_spin
-
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
LOAD_PHYSICAL(p0,r2,1f) // return address
mov r19=1 // All MCA events are treated as monarch (for now)
@@ -314,10 +306,6 @@ END(ia64_os_mca_virtual_begin)
1:
mov b0=r12 // SAL_CHECK return address
-
- // release lock
- LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
- st4.rel [r3]=r0
br b0
Index: test/include/asm-ia64/mca.h
=================================--- test.orig/include/asm-ia64/mca.h 2007-04-09 13:24:38.000000000 -0500
+++ test/include/asm-ia64/mca.h 2007-04-13 17:54:55.935180331 -0500
@@ -48,6 +48,7 @@ enum {
IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0,
IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1,
IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2,
+ IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3,
};
/* Information maintained by the MC infrastructure */
--
Russ Anderson, OS RAS/Partitioning Project Lead
SGI - Silicon Graphics Inc rja@sgi.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [patch] Support multiple CPUs going through OS_MCA
2007-04-27 21:29 [patch] Support multiple CPUs going through OS_MCA Russ Anderson
@ 2007-04-28 0:34 ` Yu, Fenghua
2007-04-30 23:07 ` Russ Anderson
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Yu, Fenghua @ 2007-04-28 0:34 UTC (permalink / raw)
To: linux-ia64
>+ if (r13 != sos->prev_IA64_KR_CURRENT) {
>+ msg = "inconsistent previous current and r13";
>+ goto no_mod;
>+ }
>+
> if (!mca_recover_range(ms->pmsa_iip)) {
>- if (r13 != sos->prev_IA64_KR_CURRENT) {
>- msg = "inconsistent previous current and r13";
>- goto no_mod;
>- }
Could you explain whey move the r13 check out of mca_recover_range()?
>+ for_each_online_cpu(i) {
>+ if (cpu_isset(i, mca_cpu)) {
>+ monarch_cpu = i;
>+ cpu_clear(i, mca_cpu); /* wake next cpu
*/
Just a picky comment...Is it better to changed to
+ if (mca_cpu!=0) {
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, mca_cpu)) {
+ monarch_cpu = i;
+ cpu_clear(i, mca_cpu); /* wake next cpu
*/
it may speed up a bit?. After all in reality, there are few bits set in
mca_cpu. So there is no need to go through all of online cpus.
Thanks.
-Fenghua
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Support multiple CPUs going through OS_MCA
2007-04-27 21:29 [patch] Support multiple CPUs going through OS_MCA Russ Anderson
2007-04-28 0:34 ` Yu, Fenghua
@ 2007-04-30 23:07 ` Russ Anderson
2007-05-07 1:04 ` Hidetoshi Seto
2007-05-18 22:17 ` Russ Anderson
3 siblings, 0 replies; 5+ messages in thread
From: Russ Anderson @ 2007-04-30 23:07 UTC (permalink / raw)
To: linux-ia64
Yu, Fenghua wrote:
>
> >+ if (r13 != sos->prev_IA64_KR_CURRENT) {
> >+ msg = "inconsistent previous current and r13";
> >+ goto no_mod;
> >+ }
> >+
> > if (!mca_recover_range(ms->pmsa_iip)) {
> >- if (r13 != sos->prev_IA64_KR_CURRENT) {
> >- msg = "inconsistent previous current and r13";
> >- goto no_mod;
> >- }
>
> Could you explain whey move the r13 check out of mca_recover_range()?
For my test cases, I can hit cases an MCA without that change (output
below) if the MCA surfaces in the interrupt IVT (address in mca_recover_range()).
The MCA is due to old_bspstore not having a valid virtual address.
--------------------------------------------------------------------------
run test 163
cpu 0, MCA occurred in user space, original stack not modified
Unable to handle kernel paging request at virtual address 603fffffff850048
MCA 4179[0]: Oops 8804682956800 [1]
Modules linked in: errinj
Pid: 0, CPU 1, comm: MCA 4179
psr : 0000101808022030 ifs : 800000000000122c ip : [<a000000100044a10>] Not tainted
ip is at ia64_mca_modify_original_stack+0x1110/0x1240
unat: 0000000000000000 pfs : 000000000000122c rsc : 0000000000000003
rnat: 0000000000000000 bsps: 0000000000000000 pr : 000000560055a9a7
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70033f
csd : 0000000000000000 ssd : 0000000000000000
b0 : a0000001000449a0 b6 : 4000000000003c40 b7 : a000000000010640
f6 : 000000000000000000000 f7 : 0ffdba200000000000000
f8 : 100018000000000000000 f9 : 10002a000000000000000
f10 : 0fffdccccccccc8c00000 f11 : 1003e0000000000000000
r1 : a000000100f69a00 r2 : 607fffffff84ae58 r3 : 0000000000550281
r8 : 0000000000000000 r9 : 607fffffff84ae40 r10 : 0000000000000000
r11 : 0000000000000000 r12 : e000006007067ac0 r13 : e000006007060000
r14 : 0000000000000001 r15 : e000006007060ce8 r16 : 0000000000000005
r17 : 0000000000000000 r18 : 0000000000000000 r19 : 0000000000000000
r20 : 0000000000000000 r21 : 0000000000000000 r22 : 8000000000000000
r23 : 0000000000000000 r24 : 000000000000003e r25 : 000000000000003f
r26 : 0000000000000009 r27 : 0000000000000000 r28 : 4000000000000000
r29 : 0000000000000000 r30 : 0000000000000000 r31 : c0000000000111c8
Call Trace:
[<a0000001000125e0>] show_stack+0x40/0xa0
spà00006007067670 bspà00006007061088
[<a000000100012ee0>] show_regs+0x840/0x880
spà00006007067840 bspà00006007061030
[<a000000100034910>] die+0x250/0x320
spà00006007067840 bspà00006007060fe0
[<a0000001000592f0>] ia64_do_page_fault+0x930/0xa60
spà00006007067860 bspà00006007060f90
[<a00000010000b520>] ia64_leave_kernel+0x0/0x290
spà000060070678f0 bspà00006007060f90
[<a000000100044a10>] ia64_mca_modify_original_stack+0x1110/0x1240
spà00006007067ac0 bspà00006007060e30
[<a000000100045ad0>] ia64_mca_handler+0x170/0xb20
spà00006007067ad0 bspà00006007060dd0
[<a000000100047420>] ia64_os_mca_virtual_begin+0x40/0x140
spà00006007067b80 bspà00006007060dd0
Kernel panic - not syncing: Attempted to kill the idle task!
--------------------------------------------------------------------------
> >+ for_each_online_cpu(i) {
> >+ if (cpu_isset(i, mca_cpu)) {
> >+ monarch_cpu = i;
> >+ cpu_clear(i, mca_cpu); /* wake next cpu
> */
>
> Just a picky comment...Is it better to changed to
> + if (mca_cpu!=0) {
> + for_each_online_cpu(i) {
> + if (cpu_isset(i, mca_cpu)) {
> + monarch_cpu = i;
> + cpu_clear(i, mca_cpu); /* wake next cpu
> */
>
> it may speed up a bit?. After all in reality, there are few bits set in
> mca_cpu. So there is no need to go through all of online cpus.
That section of code only gets executed if mca_cpu != 0, due to
this line:
if (atomic_dec_return(&mca_count) > 0) {
If mca_count is greater than 0, there is a bit set.
If mca_count = 0, there are no bits set and the code is skipped.
> Thanks.
>
> -Fenghua
>
Thanks,
--
Russ Anderson, OS RAS/Partitioning Project Lead
SGI - Silicon Graphics Inc rja@sgi.com
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Support multiple CPUs going through OS_MCA
2007-04-27 21:29 [patch] Support multiple CPUs going through OS_MCA Russ Anderson
2007-04-28 0:34 ` Yu, Fenghua
2007-04-30 23:07 ` Russ Anderson
@ 2007-05-07 1:04 ` Hidetoshi Seto
2007-05-18 22:17 ` Russ Anderson
3 siblings, 0 replies; 5+ messages in thread
From: Hidetoshi Seto @ 2007-05-07 1:04 UTC (permalink / raw)
To: linux-ia64
> @@ -1232,6 +1258,7 @@ ia64_mca_handler(struct pt_regs *regs, s
> rh->severity = sal_log_severity_corrected;
> ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
> sos->os_status = IA64_MCA_CORRECTED;
> + ia64_mlogbuf_dump();
> } else {
> /* Dump buffered message to console */
> ia64_mlogbuf_finish(1);
We cannot call ia64_mlogbuf_dump() from recoverable MCA/INIT
context. Since ia64_mlogbuf_dump() calls printk() which could
cause deadlock in MCA/INIT context, this change may be trouble.
Thanks,
H.Seto
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [patch] Support multiple CPUs going through OS_MCA
2007-04-27 21:29 [patch] Support multiple CPUs going through OS_MCA Russ Anderson
` (2 preceding siblings ...)
2007-05-07 1:04 ` Hidetoshi Seto
@ 2007-05-18 22:17 ` Russ Anderson
3 siblings, 0 replies; 5+ messages in thread
From: Russ Anderson @ 2007-05-18 22:17 UTC (permalink / raw)
To: linux-ia64
Hidetoshi Seto wrote:
>
> > @@ -1232,6 +1258,7 @@ ia64_mca_handler(struct pt_regs *regs, s
> > rh->severity = sal_log_severity_corrected;
> > ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
> > sos->os_status = IA64_MCA_CORRECTED;
> > + ia64_mlogbuf_dump();
> > } else {
> > /* Dump buffered message to console */
> > ia64_mlogbuf_finish(1);
>
> We cannot call ia64_mlogbuf_dump() from recoverable MCA/INIT
> context. Since ia64_mlogbuf_dump() calls printk() which could
> cause deadlock in MCA/INIT context, this change may be trouble.
You are correct. An updated version of the patch without
that line.
-------------------------------------------------------------------
[patch] Support multiple CPUs going through OS_MCA
Linux does not gracefully deal with multiple processors going
through OS_MCA aa part of the same MCA event. The first cpu
into OS_MCA grabs the ia64_mca_serialize lock. Subsequent
cpus wait for that lock, preventing them from reporting in as
rendezvoused. The first cpu waits 5 seconds then complains
that all the cpus have not rendezvoused. The first cpu then
handles its MCA and frees up all the rendezvoused cpus and
releases the ia64_mca_serialize lock. One of the subsequent
cpus going thought OS_MCA then gets the ia64_mca_serialize
lock, waits another 5 seconds and then complains that none of
the other cpus have rendezvoused.
This patch allows multiple CPUs to gracefully go through OS_MCA.
The first CPU into ia64_mca_handler() grabs a mca_count lock.
Subsequent CPUs into ia64_mca_handler() are added to a list of cpus
that need to go through OS_MCA (a bit set in mca_cpu), and report
in as rendezvoused, and but spin waiting their turn.
The first CPU sees everyone rendezvous, handles his MCA, wakes up
one of the other CPUs waiting to process their MCA (by clearing
one mca_cpu bit), and then waits for the other cpus to complete
their MCA handling. The next CPU handles his MCA and the process
repeats until all the CPUs have handled their MCA. When the last
CPU has handled it's MCA, it sets monarch_cpu to -1, releasing all
the CPUs.
In testing this works more reliably and faster.
Thanks to Keith Owens for suggesting numerous improvements
to this code.
Signed-off-by: Russ Anderson <rja@sgi.com>
---
arch/ia64/kernel/mca.c | 60 +++++++++++++++++++++++++++++++++++++++------
arch/ia64/kernel/mca_asm.S | 12 ---------
include/asm-ia64/mca.h | 1
3 files changed, 54 insertions(+), 19 deletions(-)
Index: test/arch/ia64/kernel/mca.c
=================================--- test.orig/arch/ia64/kernel/mca.c 2007-05-18 14:23:54.689494599 -0500
+++ test/arch/ia64/kernel/mca.c 2007-05-18 14:23:57.029784462 -0500
@@ -57,6 +57,9 @@
*
* 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
* Add printing support for MCA/INIT.
+ *
+ * 2007-04-27 Russ Anderson <rja@sgi.com>
+ * Support multiple cpus going through OS_MCA in the same event.
*/
#include <linux/types.h>
#include <linux/init.h>
@@ -96,7 +99,6 @@
#endif
/* Used by mca_asm.S */
-u32 ia64_mca_serialize;
DEFINE_PER_CPU(u64, ia64_mca_data); /* = __per_cpu_mca[smp_processor_id()] */
DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
@@ -963,11 +965,12 @@ ia64_mca_modify_original_stack(struct pt
goto no_mod;
}
+ if (r13 != sos->prev_IA64_KR_CURRENT) {
+ msg = "inconsistent previous current and r13";
+ goto no_mod;
+ }
+
if (!mca_recover_range(ms->pmsa_iip)) {
- if (r13 != sos->prev_IA64_KR_CURRENT) {
- msg = "inconsistent previous current and r13";
- goto no_mod;
- }
if ((r12 - r13) >= KERNEL_STACK_SIZE) {
msg = "inconsistent r12 and r13";
goto no_mod;
@@ -1187,6 +1190,13 @@ all_in:
* further MCA logging is enabled by clearing logs.
* Monarch also has the duty of sending wakeup-IPIs to pull the
* slave processors out of rendezvous spinloop.
+ *
+ * If multiple processors call into OS_MCA, the first will become
+ * the monarch. Subsequent cpus will be recorded in the mca_cpu
+ * bitmask. After the first monarch has processed its MCA, it
+ * will wake up the next cpu in the mca_cpu bitmask and then go
+ * into the rendezvous loop. When all processors have serviced
+ * their MCA, the last monarch frees up the rest of the processors.
*/
void
ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
@@ -1196,16 +1206,32 @@ ia64_mca_handler(struct pt_regs *regs, s
struct task_struct *previous_current;
struct ia64_mca_notify_die nd { .sos = sos, .monarch_cpu = &monarch_cpu };
+ static atomic_t mca_count;
+ static cpumask_t mca_cpu;
+ if (atomic_add_return(1, &mca_count) = 1) {
+ monarch_cpu = cpu;
+ sos->monarch = 1;
+ } else {
+ cpu_set(cpu, mca_cpu);
+ sos->monarch = 0;
+ }
mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
- monarch_cpu = cpu;
+
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
= NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
- ia64_wait_for_slaves(cpu, "MCA");
+ if (sos->monarch) {
+ ia64_wait_for_slaves(cpu, "MCA");
+ } else {
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+ while (cpu_isset(cpu, mca_cpu))
+ cpu_relax(); /* spin until monarch wakes us */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+ }
/* Wakeup all the processors which are spinning in the rendezvous loop.
* They will leave SAL, then spin in the OS with interrupts disabled
@@ -1244,6 +1270,26 @@ ia64_mca_handler(struct pt_regs *regs, s
= NOTIFY_STOP)
ia64_mca_spin(__FUNCTION__);
+
+ if (atomic_dec_return(&mca_count) > 0) {
+ int i;
+
+ /* wake up the next monarch cpu,
+ * and put this cpu in the rendez loop.
+ */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, mca_cpu)) {
+ monarch_cpu = i;
+ cpu_clear(i, mca_cpu); /* wake next cpu */
+ while (monarch_cpu != -1)
+ cpu_relax(); /* spin until last cpu leaves */
+ ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
+ set_curr_task(cpu, previous_current);
+ return;
+ }
+ }
+ }
set_curr_task(cpu, previous_current);
monarch_cpu = -1;
}
Index: test/arch/ia64/kernel/mca_asm.S
=================================--- test.orig/arch/ia64/kernel/mca_asm.S 2007-05-18 14:23:27.210090966 -0500
+++ test/arch/ia64/kernel/mca_asm.S 2007-05-18 14:23:57.037785453 -0500
@@ -133,14 +133,6 @@ ia64_do_tlb_purge:
//StartMain////////////////////////////////////////////////////////////////////
ia64_os_mca_dispatch:
- // Serialize all MCA processing
- mov r3=1;;
- LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
-ia64_os_mca_spin:
- xchg4 r4=[r2],r3;;
- cmp.ne p6,p0=r4,r0
-(p6) br ia64_os_mca_spin
-
mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack
LOAD_PHYSICAL(p0,r2,1f) // return address
mov r19=1 // All MCA events are treated as monarch (for now)
@@ -290,10 +282,6 @@ END(ia64_os_mca_virtual_begin)
1:
mov b0=r12 // SAL_CHECK return address
-
- // release lock
- LOAD_PHYSICAL(p0,r3,ia64_mca_serialize);;
- st4.rel [r3]=r0
br b0
Index: test/include/asm-ia64/mca.h
=================================--- test.orig/include/asm-ia64/mca.h 2007-05-18 14:23:27.210090966 -0500
+++ test/include/asm-ia64/mca.h 2007-05-18 14:23:57.057787931 -0500
@@ -48,6 +48,7 @@ enum {
IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0,
IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1,
IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2,
+ IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3,
};
/* Information maintained by the MC infrastructure */
--
Russ Anderson, OS RAS/Partitioning Project Lead
SGI - Silicon Graphics Inc rja@sgi.com
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-05-18 22:17 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-04-27 21:29 [patch] Support multiple CPUs going through OS_MCA Russ Anderson
2007-04-28 0:34 ` Yu, Fenghua
2007-04-30 23:07 ` Russ Anderson
2007-05-07 1:04 ` Hidetoshi Seto
2007-05-18 22:17 ` Russ Anderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox