All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juergen Gross <juergen.gross@fujitsu-siemens.com>
To: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>
Subject: [Patch] avoid deadlock during console output
Date: Fri, 06 Mar 2009 09:46:40 +0100	[thread overview]
Message-ID: <49B0E2F0.5060104@fujitsu-siemens.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1080 bytes --]

Hi,

during my test for cpupools I've found an issue in console output.
Sometimes the hypervisor hangs up due to a deadlock if something is printed
to the console via printk if a per-cpu scheduler lock is held by the printing
processor. Inside printk an event is sent to dom0 which in some cases leads to
a call of vcpu_wake resulting in the deadlock.
This problem occurs when calling BUG during holding the lock, too.
This issue is easily reproducable on a system with multiple cpus under low
load by calling

xm debug-keys r

to dump the schedulers run-queues. On my 4-core machine I need only about 5
calls to stop the machine.

The attached patch solves the problem by avoiding sending the event in
critical paths.


Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@fujitsu-siemens.com
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html

[-- Attachment #2: consout.patch --]
[-- Type: text/x-patch, Size: 6599 bytes --]

Signed-off-by: juergen.gross@fujitsu-siemens.com

# HG changeset patch
# User juergen.gross@fujitsu-siemens.com
# Date 1236328387 -3600
# Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11
# Parent  6315b66fbd5b25597ad2aa766aeda68d6852205d
avoid deadlocks in console output

diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c	Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/arch/x86/traps.c	Fri Mar 06 09:33:07 2009 +0100
@@ -389,6 +389,7 @@
     {
         watchdog_disable();
         console_start_sync();
+        console_enter_critical();
 
         show_execution_state(regs);
 
@@ -398,6 +399,7 @@
             printk("Faulting linear address: %p\n", _p(cr2));
             show_page_walk(cr2);
         }
+        console_exit_critical();
     }
 
     panic("FATAL TRAP: vector = %d (%s)\n"
@@ -545,7 +547,9 @@
 
     DEBUGGER_trap_fatal(trapnr, regs);
 
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (%s)\n"
           "[error_code=%04x]\n",
           trapnr, trapstr(trapnr), regs->error_code);
@@ -866,7 +870,9 @@
 
     if ( id == BUGFRAME_dump )
     {
+        console_enter_critical();
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
@@ -883,17 +889,21 @@
 
     if ( id == BUGFRAME_warn )
     {
+        console_enter_critical();
         printk("Xen WARN at %.50s:%d\n", filename, lineno);
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
 
     if ( id == BUGFRAME_bug )
     {
+        console_enter_critical();
         printk("Xen BUG at %.50s:%d\n", filename, lineno);
         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
         show_execution_state(regs);
+        console_exit_critical();
         panic("Xen BUG at %.50s:%d\n", filename, lineno);
     }
 
@@ -906,10 +916,12 @@
     eip += sizeof(bug_str);
 
     predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
+    console_enter_critical();
     printk("Assertion '%s' failed at %.50s:%d\n",
            predicate, filename, lineno);
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
     show_execution_state(regs);
+    console_exit_critical();
     panic("Assertion '%s' failed at %.50s:%d\n",
           predicate, filename, lineno);
 
@@ -920,7 +932,9 @@
         return;
     }
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
 }
 
@@ -945,10 +959,12 @@
 static void reserved_bit_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
+    console_enter_critical();
     printk("d%d:v%d: reserved bit in page table (ec=%04X)\n",
            current->domain->domain_id, current->vcpu_id, regs->error_code);
     show_page_walk(addr);
     show_execution_state(regs);
+    console_exit_critical();
 }
 
 void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1247,8 +1263,10 @@
 
         DEBUGGER_trap_fatal(TRAP_page_fault, regs);
 
+        console_enter_critical();
         show_execution_state(regs);
         show_page_walk(addr);
+        console_exit_critical();
         panic("FATAL PAGE FAULT\n"
               "[error_code=%04x]\n"
               "Faulting linear address: %p\n",
@@ -2757,7 +2775,9 @@
     DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
 
  hardware_gp:
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
 }
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c
--- a/xen/common/schedule.c	Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/common/schedule.c	Fri Mar 06 09:33:07 2009 +0100
@@ -930,10 +930,12 @@
 
     for_each_online_cpu ( i )
     {
+        console_enter_critical();
         spin_lock(&per_cpu(schedule_data, i).schedule_lock);
         printk("CPU[%02d] ", i);
         SCHED_OP(dump_cpu_state, i);
         spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
+        console_exit_critical();
     }
 
     local_irq_restore(flags);
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c	Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/drivers/char/console.c	Fri Mar 06 09:33:07 2009 +0100
@@ -414,6 +414,22 @@
  * *****************************************************
  */
 
+/* don't try to wake up dom0 if schedule lock might be held, as this could
+   result in a deadlock! */
+
+static atomic_t console_crit_cnt = ATOMIC_INIT(0);
+
+void console_enter_critical(void)
+{
+    atomic_inc(&console_crit_cnt);
+}
+
+void console_exit_critical(void)
+{
+    BUG_ON(atomic_read(&console_crit_cnt) == 0);
+    atomic_dec(&console_crit_cnt);
+}
+
 static void __putstr(const char *str)
 {
     int c;
@@ -426,7 +442,8 @@
     while ( (c = *str++) != '\0' )
         putchar_console_ring(c);
 
-    send_guest_global_virq(dom0, VIRQ_CON_RING);
+    if (atomic_read(&console_crit_cnt) == 0)
+        send_guest_global_virq(dom0, VIRQ_CON_RING);
 }
 
 static int printk_prefix_check(char *p, char **pp)
@@ -915,6 +932,7 @@
     static DEFINE_SPINLOCK(lock);
     static char buf[128];
     
+    console_enter_critical();
     debugtrace_dump();
 
     /* Protects buf[] and ensure multi-line message prints atomically. */
@@ -935,6 +953,7 @@
         printk("Reboot in five seconds...\n");
 
     spin_unlock_irqrestore(&lock, flags);
+    console_exit_critical();
 
     debugger_trap_immediate();
 
@@ -953,17 +972,21 @@
 
 void __bug(char *file, int line)
 {
+    console_enter_critical();
     console_start_sync();
     printk("Xen BUG at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
     panic("Xen BUG at %s:%d\n", file, line);
     for ( ; ; ) ;
 }
 
 void __warn(char *file, int line)
 {
+    console_enter_critical();
     printk("Xen WARN at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
 }
 
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h	Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/include/xen/lib.h	Fri Mar 06 09:33:07 2009 +0100
@@ -100,4 +100,8 @@
 extern char *print_tainted(char *str);
 extern void add_taint(unsigned);
 
+/* avoid scheduling during console output in critical paths */
+void console_enter_critical(void);
+void console_exit_critical(void);
+
 #endif /* __LIB_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

             reply	other threads:[~2009-03-06  8:46 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-03-06  8:46 Juergen Gross [this message]
2009-03-06  9:11 ` [Patch] avoid deadlock during console output Keir Fraser
2009-03-09  6:18   ` Juergen Gross
2009-03-09  7:45     ` Juergen Gross
2009-03-09  8:28     ` Keir Fraser

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=49B0E2F0.5060104@fujitsu-siemens.com \
    --to=juergen.gross@fujitsu-siemens.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.