WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [Patch] avoid deadlock during console output

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [Patch] avoid deadlock during console output
From: Juergen Gross <juergen.gross@xxxxxxxxxxxxxxxxxxx>
Date: Fri, 06 Mar 2009 09:46:40 +0100
Delivery-date: Fri, 06 Mar 2009 00:47:13 -0800
Domainkey-signature: s=s768; d=fujitsu-siemens.com; c=nofws; q=dns; h=X-SBRSScore:X-IronPort-AV:Received:X-IronPort-AV: Received:Received:Message-ID:Date:From:Organization: User-Agent:MIME-Version:To:Subject:X-Enigmail-Version: Content-Type; b=K7muPxBdm7EyvXa9OTnbmQHEPJc8N0tgE4a2Vvg3VPAePYebBzRbFOXT bVIuBY8okRMa0ZR5B4iTNN82WMfNvu2KuUg8WkXuk6mvec5xt5Hl3hwMj QMo0oTaRNEff/V3;
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Organization: Fujitsu Siemens Computers
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mozilla-Thunderbird 2.0.0.19 (X11/20090103)
Hi,

during my test for cpupools I've found an issue in console output.
Sometimes the hypervisor hangs up due to a deadlock if something is printed
to the console via printk if a per-cpu scheduler lock is held by the printing
processor. Inside printk an event is sent to dom0 which in some cases leads to
a call of vcpu_wake resulting in the deadlock.
This problem occurs when calling BUG during holding the lock, too.
This issue is easily reproducable on a system with multiple cpus under low
load by calling

xm debug-keys r

to dump the schedulers run-queues. On my 4-core machine I need only about 5
calls to stop the machine.

The attached patch solves the problem by avoiding sending the event in
critical paths.


Juergen

-- 
Juergen Gross                             Principal Developer
IP SW OS6                      Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers         e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6                Internet: www.fujitsu-siemens.com
D-81739 Muenchen         Company details: www.fujitsu-siemens.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxxxxxxx

# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxxx
# Date 1236328387 -3600
# Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11
# Parent  6315b66fbd5b25597ad2aa766aeda68d6852205d
avoid deadlocks in console output

diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c      Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/arch/x86/traps.c      Fri Mar 06 09:33:07 2009 +0100
@@ -389,6 +389,7 @@
     {
         watchdog_disable();
         console_start_sync();
+        console_enter_critical();
 
         show_execution_state(regs);
 
@@ -398,6 +399,7 @@
             printk("Faulting linear address: %p\n", _p(cr2));
             show_page_walk(cr2);
         }
+        console_exit_critical();
     }
 
     panic("FATAL TRAP: vector = %d (%s)\n"
@@ -545,7 +547,9 @@
 
     DEBUGGER_trap_fatal(trapnr, regs);
 
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (%s)\n"
           "[error_code=%04x]\n",
           trapnr, trapstr(trapnr), regs->error_code);
@@ -866,7 +870,9 @@
 
     if ( id == BUGFRAME_dump )
     {
+        console_enter_critical();
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
@@ -883,17 +889,21 @@
 
     if ( id == BUGFRAME_warn )
     {
+        console_enter_critical();
         printk("Xen WARN at %.50s:%d\n", filename, lineno);
         show_execution_state(regs);
+        console_exit_critical();
         regs->eip = (unsigned long)eip;
         return;
     }
 
     if ( id == BUGFRAME_bug )
     {
+        console_enter_critical();
         printk("Xen BUG at %.50s:%d\n", filename, lineno);
         DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
         show_execution_state(regs);
+        console_exit_critical();
         panic("Xen BUG at %.50s:%d\n", filename, lineno);
     }
 
@@ -906,10 +916,12 @@
     eip += sizeof(bug_str);
 
     predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
+    console_enter_critical();
     printk("Assertion '%s' failed at %.50s:%d\n",
            predicate, filename, lineno);
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
     show_execution_state(regs);
+    console_exit_critical();
     panic("Assertion '%s' failed at %.50s:%d\n",
           predicate, filename, lineno);
 
@@ -920,7 +932,9 @@
         return;
     }
     DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
 }
 
@@ -945,10 +959,12 @@
 static void reserved_bit_page_fault(
     unsigned long addr, struct cpu_user_regs *regs)
 {
+    console_enter_critical();
     printk("d%d:v%d: reserved bit in page table (ec=%04X)\n",
            current->domain->domain_id, current->vcpu_id, regs->error_code);
     show_page_walk(addr);
     show_execution_state(regs);
+    console_exit_critical();
 }
 
 void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1247,8 +1263,10 @@
 
         DEBUGGER_trap_fatal(TRAP_page_fault, regs);
 
+        console_enter_critical();
         show_execution_state(regs);
         show_page_walk(addr);
+        console_exit_critical();
         panic("FATAL PAGE FAULT\n"
               "[error_code=%04x]\n"
               "Faulting linear address: %p\n",
@@ -2757,7 +2775,9 @@
     DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
 
  hardware_gp:
+    console_enter_critical();
     show_execution_state(regs);
+    console_exit_critical();
     panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
 }
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c
--- a/xen/common/schedule.c     Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/common/schedule.c     Fri Mar 06 09:33:07 2009 +0100
@@ -930,10 +930,12 @@
 
     for_each_online_cpu ( i )
     {
+        console_enter_critical();
         spin_lock(&per_cpu(schedule_data, i).schedule_lock);
         printk("CPU[%02d] ", i);
         SCHED_OP(dump_cpu_state, i);
         spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
+        console_exit_critical();
     }
 
     local_irq_restore(flags);
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c        Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/drivers/char/console.c        Fri Mar 06 09:33:07 2009 +0100
@@ -414,6 +414,22 @@
  * *****************************************************
  */
 
+/* don't try to wake up dom0 if schedule lock might be held, as this could
+   result in a deadlock! */
+
+static atomic_t console_crit_cnt = ATOMIC_INIT(0);
+
+void console_enter_critical(void)
+{
+    atomic_inc(&console_crit_cnt);
+}
+
+void console_exit_critical(void)
+{
+    BUG_ON(atomic_read(&console_crit_cnt) == 0);
+    atomic_dec(&console_crit_cnt);
+}
+
 static void __putstr(const char *str)
 {
     int c;
@@ -426,7 +442,8 @@
     while ( (c = *str++) != '\0' )
         putchar_console_ring(c);
 
-    send_guest_global_virq(dom0, VIRQ_CON_RING);
+    if (atomic_read(&console_crit_cnt) == 0)
+        send_guest_global_virq(dom0, VIRQ_CON_RING);
 }
 
 static int printk_prefix_check(char *p, char **pp)
@@ -915,6 +932,7 @@
     static DEFINE_SPINLOCK(lock);
     static char buf[128];
     
+    console_enter_critical();
     debugtrace_dump();
 
     /* Protects buf[] and ensure multi-line message prints atomically. */
@@ -935,6 +953,7 @@
         printk("Reboot in five seconds...\n");
 
     spin_unlock_irqrestore(&lock, flags);
+    console_exit_critical();
 
     debugger_trap_immediate();
 
@@ -953,17 +972,21 @@
 
 void __bug(char *file, int line)
 {
+    console_enter_critical();
     console_start_sync();
     printk("Xen BUG at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
     panic("Xen BUG at %s:%d\n", file, line);
     for ( ; ; ) ;
 }
 
 void __warn(char *file, int line)
 {
+    console_enter_critical();
     printk("Xen WARN at %s:%d\n", file, line);
     dump_execution_state();
+    console_exit_critical();
 }
 
 
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h     Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/include/xen/lib.h     Fri Mar 06 09:33:07 2009 +0100
@@ -100,4 +100,8 @@
 extern char *print_tainted(char *str);
 extern void add_taint(unsigned);
 
+/* avoid scheduling during console output in critical paths */
+void console_enter_critical(void);
+void console_exit_critical(void);
+
 #endif /* __LIB_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>