[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [Patch] avoid deadlock during console output


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
  • Date: Fri, 06 Mar 2009 09:46:40 +0100
  • Delivery-date: Fri, 06 Mar 2009 00:47:13 -0800
  • Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; h=X-SBRSScore:X-IronPort-AV:Received:X-IronPort-AV: Received:Received:Message-ID:Date:From:Organization: User-Agent:MIME-Version:To:Subject:X-Enigmail-Version: Content-Type; b=K7muPxBdm7EyvXa9OTnbmQHEPJc8N0tgE4a2Vvg3VPAePYebBzRbFOXT bVIuBY8okRMa0ZR5B4iTNN82WMfNvu2KuUg8WkXuk6mvec5xt5Hl3hwMj QMo0oTaRNEff/V3;
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Hi,

during my test for cpupools I've found an issue in console output.
Sometimes the hypervisor hangs up due to a deadlock if something is printed
to the console via printk if a per-cpu scheduler lock is held by the printing
processor. Inside printk an event is sent to dom0 which in some cases leads to
a call of vcpu_wake resulting in the deadlock.
This problem occurs when calling BUG during holding the lock, too.
This issue is easily reproducable on a system with multiple cpus under low
load by calling

xm debug-keys r

to dump the schedulers run-queues. On my 4-core machine I need only about 5
calls to stop the machine.

The attached patch solves the problem by avoiding sending the event in
critical paths.


Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxxxxxxx

# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxxx
# Date 1236328387 -3600
# Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11
# Parent  6315b66fbd5b25597ad2aa766aeda68d6852205d
avoid deadlocks in console output

diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/arch/x86/traps.c      Fri Mar 06 09:33:07 2009 +0100
@@ -389,6 +389,7 @@
     {
         watchdog_disable();
         console_start_sync();
+        console_enter_critical();
 
         show_execution_state(regs);
 
@@ -398,6 +399,7 @@
             printk("Faulting linear address: %p\n", _p(cr2));
             show_page_walk(cr2);
         }
+        console_exit_critical();
     }
 
     panic("FATAL TRAP: vector = %d (%s)\n"
@@ -545,7 +547,9 @@
 
     DEBUGGER_trap_fatal(trapnr, regs);
 
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (%s)\n"
           "[error_code=%04x]\n",
           trapnr, trapstr(trapnr), regs->error_code);
@@ -866,7 +870,9 @@
 
     if ( id == BUGFRAME_dump )
     {
+        console_enter_critical();
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
@@ -883,17 +889,21 @@
 
     if ( id == BUGFRAME_warn )
     {
+        console_enter_critical();
         printk("Xen WARN at %.50s:%d\n", filename, lineno);
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
 
     if ( id == BUGFRAME_bug )
     {
+        console_enter_critical();
         printk("Xen BUG at %.50s:%d\n", filename, lineno);
         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
         show_execution_state(regs);
+        console_exit_critical();
         panic("Xen BUG at %.50s:%d\n", filename, lineno);
     }
 
@@ -906,10 +916,12 @@
     eip += sizeof(bug_str);
 
     predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
+    console_enter_critical();
     printk("Assertion '%s' failed at %.50s:%d\n",
            predicate, filename, lineno);
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
     show_execution_state(regs);
+    console_exit_critical();
     panic("Assertion '%s' failed at %.50s:%d\n",
           predicate, filename, lineno);
 
@@ -920,7 +932,9 @@
         return;
     }
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
 }
 
@@ -945,10 +959,12 @@
 static void reserved_bit_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
+    console_enter_critical();
     printk("d%d:v%d: reserved bit in page table (ec=%04X)\n",
            current->domain->domain_id, current->vcpu_id, regs->error_code);
     show_page_walk(addr);
     show_execution_state(regs);
+    console_exit_critical();
 }
 
 void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1247,8 +1263,10 @@
 
         DEBUGGER_trap_fatal(TRAP_page_fault, regs);
 
+        console_enter_critical();
         show_execution_state(regs);
         show_page_walk(addr);
+        console_exit_critical();
         panic("FATAL PAGE FAULT\n"
               "[error_code=%04x]\n"
               "Faulting linear address: %p\n",
@@ -2757,7 +2775,9 @@
     DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
 
  hardware_gp:
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
 }
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/common/schedule.c     Fri Mar 06 09:33:07 2009 +0100
@@ -930,10 +930,12 @@
 
     for_each_online_cpu ( i )
     {
+        console_enter_critical();
         spin_lock(&per_cpu(schedule_data, i).schedule_lock);
         printk("CPU[%02d] ", i);
         SCHED_OP(dump_cpu_state, i);
         spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
+        console_exit_critical();
     }
 
     local_irq_restore(flags);
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/drivers/char/console.c        Fri Mar 06 09:33:07 2009 +0100
@@ -414,6 +414,22 @@
  * *****************************************************
  */
 
+/* don't try to wake up dom0 if schedule lock might be held, as this could
+   result in a deadlock! */
+
+static atomic_t console_crit_cnt = ATOMIC_INIT(0);
+
+void console_enter_critical(void)
+{
+    atomic_inc(&console_crit_cnt);
+}
+
+void console_exit_critical(void)
+{
+    BUG_ON(atomic_read(&console_crit_cnt) == 0);
+    atomic_dec(&console_crit_cnt);
+}
+
 static void __putstr(const char *str)
 {
     int c;
@@ -426,7 +442,8 @@
     while ( (c = *str++) != '\0' )
         putchar_console_ring(c);
 
-    send_guest_global_virq(dom0, VIRQ_CON_RING);
+    if (atomic_read(&console_crit_cnt) == 0)
+        send_guest_global_virq(dom0, VIRQ_CON_RING);
 }
 
 static int printk_prefix_check(char *p, char **pp)
@@ -915,6 +932,7 @@
     static DEFINE_SPINLOCK(lock);
     static char buf[128];
     
+    console_enter_critical();
     debugtrace_dump();
 
     /* Protects buf[] and ensure multi-line message prints atomically. */
@@ -935,6 +953,7 @@
         printk("Reboot in five seconds...\n");
 
     spin_unlock_irqrestore(&lock, flags);
+    console_exit_critical();
 
     debugger_trap_immediate();
 
@@ -953,17 +972,21 @@
 
 void __bug(char *file, int line)
 {
+    console_enter_critical();
     console_start_sync();
     printk("Xen BUG at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
     panic("Xen BUG at %s:%d\n", file, line);
     for ( ; ; ) ;
 }
 
 void __warn(char *file, int line)
 {
+    console_enter_critical();
     printk("Xen WARN at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
 }
 
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/include/xen/lib.h     Fri Mar 06 09:33:07 2009 +0100
@@ -100,4 +100,8 @@
 extern char *print_tainted(char *str);
 extern void add_taint(unsigned);
 
+/* avoid scheduling during console output in critical paths */
+void console_enter_critical(void);
+void console_exit_critical(void);
+
 #endif /* __LIB_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.