* [Patch] avoid deadlock during console output
@ 2009-03-06 8:46 Juergen Gross
2009-03-06 9:11 ` Keir Fraser
0 siblings, 1 reply; 5+ messages in thread
From: Juergen Gross @ 2009-03-06 8:46 UTC (permalink / raw)
To: xen-devel@lists.xensource.com
[-- Attachment #1: Type: text/plain, Size: 1080 bytes --]
Hi,
during my test for cpupools I've found an issue in console output.
Sometimes the hypervisor hangs up due to a deadlock if something is printed
to the console via printk if a per-cpu scheduler lock is held by the printing
processor. Inside printk an event is sent to dom0 which in some cases leads to
a call of vcpu_wake resulting in the deadlock.
This problem occurs when calling BUG during holding the lock, too.
This issue is easily reproducable on a system with multiple cpus under low
load by calling
xm debug-keys r
to dump the schedulers run-queues. On my 4-core machine I need only about 5
calls to stop the machine.
The attached patch solves the problem by avoiding sending the event in
critical paths.
Juergen
--
Juergen Gross Principal Developer
IP SW OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers e-mail: juergen.gross@fujitsu-siemens.com
Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com
D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html
[-- Attachment #2: consout.patch --]
[-- Type: text/x-patch, Size: 6599 bytes --]
Signed-off-by: juergen.gross@fujitsu-siemens.com
# HG changeset patch
# User juergen.gross@fujitsu-siemens.com
# Date 1236328387 -3600
# Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11
# Parent 6315b66fbd5b25597ad2aa766aeda68d6852205d
avoid deadlocks in console output
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/arch/x86/traps.c Fri Mar 06 09:33:07 2009 +0100
@@ -389,6 +389,7 @@
{
watchdog_disable();
console_start_sync();
+ console_enter_critical();
show_execution_state(regs);
@@ -398,6 +399,7 @@
printk("Faulting linear address: %p\n", _p(cr2));
show_page_walk(cr2);
}
+ console_exit_critical();
}
panic("FATAL TRAP: vector = %d (%s)\n"
@@ -545,7 +547,9 @@
DEBUGGER_trap_fatal(trapnr, regs);
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("FATAL TRAP: vector = %d (%s)\n"
"[error_code=%04x]\n",
trapnr, trapstr(trapnr), regs->error_code);
@@ -866,7 +870,9 @@
if ( id == BUGFRAME_dump )
{
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
regs->eip = (unsigned long)eip;
return;
}
@@ -883,17 +889,21 @@
if ( id == BUGFRAME_warn )
{
+ console_enter_critical();
printk("Xen WARN at %.50s:%d\n", filename, lineno);
show_execution_state(regs);
+ console_exit_critical();
regs->eip = (unsigned long)eip;
return;
}
if ( id == BUGFRAME_bug )
{
+ console_enter_critical();
printk("Xen BUG at %.50s:%d\n", filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
+ console_exit_critical();
panic("Xen BUG at %.50s:%d\n", filename, lineno);
}
@@ -906,10 +916,12 @@
eip += sizeof(bug_str);
predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
+ console_enter_critical();
printk("Assertion '%s' failed at %.50s:%d\n",
predicate, filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
+ console_exit_critical();
panic("Assertion '%s' failed at %.50s:%d\n",
predicate, filename, lineno);
@@ -920,7 +932,9 @@
return;
}
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
}
@@ -945,10 +959,12 @@
static void reserved_bit_page_fault(
unsigned long addr, struct cpu_user_regs *regs)
{
+ console_enter_critical();
printk("d%d:v%d: reserved bit in page table (ec=%04X)\n",
current->domain->domain_id, current->vcpu_id, regs->error_code);
show_page_walk(addr);
show_execution_state(regs);
+ console_exit_critical();
}
void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1247,8 +1263,10 @@
DEBUGGER_trap_fatal(TRAP_page_fault, regs);
+ console_enter_critical();
show_execution_state(regs);
show_page_walk(addr);
+ console_exit_critical();
panic("FATAL PAGE FAULT\n"
"[error_code=%04x]\n"
"Faulting linear address: %p\n",
@@ -2757,7 +2775,9 @@
DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
hardware_gp:
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
}
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c
--- a/xen/common/schedule.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/common/schedule.c Fri Mar 06 09:33:07 2009 +0100
@@ -930,10 +930,12 @@
for_each_online_cpu ( i )
{
+ console_enter_critical();
spin_lock(&per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
SCHED_OP(dump_cpu_state, i);
spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
+ console_exit_critical();
}
local_irq_restore(flags);
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/drivers/char/console.c Fri Mar 06 09:33:07 2009 +0100
@@ -414,6 +414,22 @@
* *****************************************************
*/
+/* don't try to wake up dom0 if schedule lock might be held, as this could
+ result in a deadlock! */
+
+static atomic_t console_crit_cnt = ATOMIC_INIT(0);
+
+void console_enter_critical(void)
+{
+ atomic_inc(&console_crit_cnt);
+}
+
+void console_exit_critical(void)
+{
+ BUG_ON(atomic_read(&console_crit_cnt) == 0);
+ atomic_dec(&console_crit_cnt);
+}
+
static void __putstr(const char *str)
{
int c;
@@ -426,7 +442,8 @@
while ( (c = *str++) != '\0' )
putchar_console_ring(c);
- send_guest_global_virq(dom0, VIRQ_CON_RING);
+ if (atomic_read(&console_crit_cnt) == 0)
+ send_guest_global_virq(dom0, VIRQ_CON_RING);
}
static int printk_prefix_check(char *p, char **pp)
@@ -915,6 +932,7 @@
static DEFINE_SPINLOCK(lock);
static char buf[128];
+ console_enter_critical();
debugtrace_dump();
/* Protects buf[] and ensure multi-line message prints atomically. */
@@ -935,6 +953,7 @@
printk("Reboot in five seconds...\n");
spin_unlock_irqrestore(&lock, flags);
+ console_exit_critical();
debugger_trap_immediate();
@@ -953,17 +972,21 @@
void __bug(char *file, int line)
{
+ console_enter_critical();
console_start_sync();
printk("Xen BUG at %s:%d\n", file, line);
dump_execution_state();
+ console_exit_critical();
panic("Xen BUG at %s:%d\n", file, line);
for ( ; ; ) ;
}
void __warn(char *file, int line)
{
+ console_enter_critical();
printk("Xen WARN at %s:%d\n", file, line);
dump_execution_state();
+ console_exit_critical();
}
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/include/xen/lib.h Fri Mar 06 09:33:07 2009 +0100
@@ -100,4 +100,8 @@
extern char *print_tainted(char *str);
extern void add_taint(unsigned);
+/* avoid scheduling during console output in critical paths */
+void console_enter_critical(void);
+void console_exit_critical(void);
+
#endif /* __LIB_H__ */
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [Patch] avoid deadlock during console output
2009-03-06 8:46 [Patch] avoid deadlock during console output Juergen Gross
@ 2009-03-06 9:11 ` Keir Fraser
2009-03-09 6:18 ` Juergen Gross
0 siblings, 1 reply; 5+ messages in thread
From: Keir Fraser @ 2009-03-06 9:11 UTC (permalink / raw)
To: Juergen Gross, xen-devel@lists.xensource.com
On 06/03/2009 08:46, "Juergen Gross" <juergen.gross@fujitsu-siemens.com>
wrote:
> to dump the schedulers run-queues. On my 4-core machine I need only about 5
> calls to stop the machine.
>
> The attached patch solves the problem by avoiding sending the event in
> critical paths.
Ugly. Instead we can defer the dom0 notification to a tasklet. I'll make a
patch for that myself.
Thanks,
Keir
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Patch] avoid deadlock during console output
2009-03-06 9:11 ` Keir Fraser
@ 2009-03-09 6:18 ` Juergen Gross
2009-03-09 7:45 ` Juergen Gross
2009-03-09 8:28 ` Keir Fraser
0 siblings, 2 replies; 5+ messages in thread
From: Juergen Gross @ 2009-03-09 6:18 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel@lists.xensource.com
Keir Fraser wrote:
> On 06/03/2009 08:46, "Juergen Gross" <juergen.gross@fujitsu-siemens.com>
> wrote:
>
>> to dump the schedulers run-queues. On my 4-core machine I need only about 5
>> calls to stop the machine.
>>
>> The attached patch solves the problem by avoiding sending the event in
>> critical paths.
>
> Ugly. Instead we can defer the dom0 notification to a tasklet. I'll make a
> patch for that myself.
Hmm, do you think your patch is okay?
tasklet_schedule is taking another lock and uses BUG_ON then...
I would suggest to modify tasklet_schedule:
if ( !t->is_scheduled && !t->is_running )
{
if (!list_empty(&t->list))
{
spin_unlock_irqrestore(&tasklet_lock, flags);
BUG();
}
list_add_tail(&t->list, &tasklet_list);
}
Juergen
--
Juergen Gross Principal Developer
IP SW OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers e-mail: juergen.gross@fujitsu-siemens.com
Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com
D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [Patch] avoid deadlock during console output
2009-03-09 6:18 ` Juergen Gross
@ 2009-03-09 7:45 ` Juergen Gross
2009-03-09 8:28 ` Keir Fraser
1 sibling, 0 replies; 5+ messages in thread
From: Juergen Gross @ 2009-03-09 7:45 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel@lists.xensource.com
Juergen Gross wrote:
> Keir Fraser wrote:
>> On 06/03/2009 08:46, "Juergen Gross" <juergen.gross@fujitsu-siemens.com>
>> wrote:
>>
>>> to dump the schedulers run-queues. On my 4-core machine I need only about 5
>>> calls to stop the machine.
>>>
>>> The attached patch solves the problem by avoiding sending the event in
>>> critical paths.
>> Ugly. Instead we can defer the dom0 notification to a tasklet. I'll make a
>> patch for that myself.
>
> Hmm, do you think your patch is okay?
> tasklet_schedule is taking another lock and uses BUG_ON then...
> I would suggest to modify tasklet_schedule:
>
> if ( !t->is_scheduled && !t->is_running )
> {
> if (!list_empty(&t->list))
> {
t->is_dead = 1;
> spin_unlock_irqrestore(&tasklet_lock, flags);
> BUG();
> }
> list_add_tail(&t->list, &tasklet_list);
> }
recursion should be avoided, of course!
Juergen
--
Juergen Gross Principal Developer
IP SW OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers e-mail: juergen.gross@fujitsu-siemens.com
Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com
D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [Patch] avoid deadlock during console output
2009-03-09 6:18 ` Juergen Gross
2009-03-09 7:45 ` Juergen Gross
@ 2009-03-09 8:28 ` Keir Fraser
1 sibling, 0 replies; 5+ messages in thread
From: Keir Fraser @ 2009-03-09 8:28 UTC (permalink / raw)
To: Juergen Gross; +Cc: xen-devel@lists.xensource.com
On 09/03/2009 06:18, "Juergen Gross" <juergen.gross@fujitsu-siemens.com>
wrote:
> Hmm, do you think your patch is okay?
> tasklet_schedule is taking another lock and uses BUG_ON then...
> I would suggest to modify tasklet_schedule:
I can live with the error patch of BUG_ON() not working in this one case. I
don't think releasing the lock suffices anyway, as we'll just end up in a
recursive loop until the hypervisor stack overflows. The BUG_ON() here is
more for informative code annotation than because it's at all likely to
fire.
It perhaps makes sense to disable the tasklet_schedule() based on a flag set
in console_force_unlock(). That would at least allow NMI-based watchdog to
print if we did ever hit the BUG_ON() deadlock (or any other crash while the
tasklet lock is held).
-- Keir
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-03-09 8:28 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-06 8:46 [Patch] avoid deadlock during console output Juergen Gross
2009-03-06 9:11 ` Keir Fraser
2009-03-09 6:18 ` Juergen Gross
2009-03-09 7:45 ` Juergen Gross
2009-03-09 8:28 ` Keir Fraser
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.